diff options
Diffstat (limited to 'src')
95 files changed, 44182 insertions, 320 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 0d49bcd19ed..da638a811fb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,6 +53,10 @@ EXTRA_DIST = \ AM_CFLAGS = $(VISIBILITY_CFLAGS) AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS) +if HAVE_VULKAN +SUBDIRS += vulkan +endif + AM_CPPFLAGS = \ -I$(top_srcdir)/include/ \ -I$(top_srcdir)/src/mapi/ \ diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore index dda423f83db..e80f8af6bfc 100644 --- a/src/glsl/.gitignore +++ b/src/glsl/.gitignore @@ -4,6 +4,7 @@ glsl_parser.cpp glsl_parser.h glsl_parser.output glsl_test +spirv2nir subtest-cr/ subtest-lf/ subtest-cr-lf/ diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 98dcb37fc74..2ab40506e97 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -75,7 +75,7 @@ check_PROGRAMS = \ tests/sampler-types-test \ tests/uniform-initializer-test -noinst_PROGRAMS = glsl_compiler +noinst_PROGRAMS = glsl_compiler spirv2nir tests_blob_test_SOURCES = \ tests/blob_test.c @@ -156,6 +156,16 @@ glsl_compiler_LDADD = \ $(top_builddir)/src/libglsl_util.la \ $(PTHREAD_LIBS) +spirv2nir_SOURCES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + nir/spirv2nir.c + +spirv2nir_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + glsl_test_SOURCES = \ standalone_scaffolding.cpp \ test.cpp \ diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 0b77244ac03..b491ad4d36f 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -61,6 +61,7 @@ NIR_FILES = \ nir/nir_remove_dead_variables.c \ nir/nir_search.c \ nir/nir_search.h \ + nir/nir_spirv.h \ nir/nir_split_var_copies.c \ nir/nir_sweep.c \ nir/nir_to_ssa.c \ @@ -70,6 +71,8 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ + nir/spirv_to_nir.c \ + nir/spirv_glsl450_to_nir.c \ $(NIR_GENERATED_FILES) # libglsl diff --git a/src/glsl/ast.h b/src/glsl/ast.h index d8c6cea7832..eb6d8461671 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -527,6 +527,9 @@ struct ast_type_qualifier { unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ /** \} */ + /** \name Vulkan qualifiers */ + unsigned vk_set:1; + /** \name Layout qualifiers for GL_ARB_tessellation_shader */ /** \{ */ /* tess eval input layout */ @@ -640,6 +643,11 @@ struct ast_type_qualifier { glsl_base_type image_base_type; /** + * Vulkan descriptor set + */ + int set; + + /** * Return true if and only if an interpolation qualifier is present. */ bool has_interpolation() const; diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 06cd6a5ec59..fa2c09d2697 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -1019,6 +1019,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_SUBROUTINE: /* I assume a comparison of a struct containing a sampler just @@ -2742,7 +2743,16 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; } - if (qual->flags.q.explicit_location) { + if (qual->flags.q.vk_set) { + if (!qual->flags.q.explicit_binding) + _mesa_glsl_error(loc, state, + "Vulkan descriptor set layout requires both set " + "and binding qualifiers"); + + var->data.vk_set = true; + var->data.set = qual->set; + var->data.binding = qual->binding; + } else if (qual->flags.q.explicit_location) { validate_explicit_location(qual, var, state, loc); } else if (qual->flags.q.explicit_index) { _mesa_glsl_error(loc, state, "explicit index requires explicit location"); @@ -6228,6 +6238,10 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; + var->data.vk_set = this->layout.flags.q.vk_set; + var->data.set = this->layout.set; + var->data.binding = this->layout.binding; + state->symbols->add_variable(var); instructions->push_tail(var); } @@ -6301,6 +6315,10 @@ ast_interface_block::hir(exec_list *instructions, var->data.explicit_binding = this->layout.flags.q.explicit_binding; var->data.binding = this->layout.binding; + var->data.vk_set = this->layout.flags.q.vk_set; + var->data.set = this->layout.set; + var->data.binding = this->layout.binding; + state->symbols->add_variable(var); instructions->push_tail(var); } diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index a4671e203e2..892122af03d 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -297,6 +297,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, this->image_base_type = q.image_base_type; } + if (q.flags.q.vk_set) { + this->set = q.set; + this->binding = q.binding; + } + return true; } diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2b0c8bd8c6f..97648c15ccc 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1459,14 +1459,16 @@ layout_qualifier_id: } } - if ((state->has_420pack() || - state->has_atomic_counters() || - state->has_shader_storage_buffer_objects()) && - match_layout_qualifier("binding", $1, state) == 0) { + if (match_layout_qualifier("binding", $1, state) == 0) { $$.flags.q.explicit_binding = 1; $$.binding = $3; } + if (match_layout_qualifier("set", $1, state) == 0) { + $$.flags.q.vk_set = 1; + $$.set = $3; + } + if (state->has_atomic_counters() && match_layout_qualifier("offset", $1, state) == 0) { $$.flags.q.explicit_offset = 1; diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 46896d77999..ae2f35697fb 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -87,6 +87,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->extensions = &ctx->Extensions; + this->ARB_compute_shader_enable = true; + this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 755618ac28b..76814e894ed 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -32,6 +32,7 @@ mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; hash_table *glsl_type::array_types = NULL; hash_table *glsl_type::record_types = NULL; hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::function_types = NULL; hash_table *glsl_type::subroutine_types = NULL; void *glsl_type::mem_ctx = NULL; @@ -162,6 +163,39 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, mtx_unlock(&glsl_type::mutex); } +glsl_type::glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) : + gl_type(0), + base_type(GLSL_TYPE_FUNCTION), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_params) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + + this->fields.parameters = rzalloc_array(this->mem_ctx, + glsl_function_param, num_params + 1); + + /* We store the return type as the first parameter */ + this->fields.parameters[0].type = return_type; + this->fields.parameters[0].in = false; + this->fields.parameters[0].out = true; + + /* We store the i'th parameter in slot i+1 */ + for (i = 0; i < length; i++) { + this->fields.parameters[i + 1].type = params[i].type; + this->fields.parameters[i + 1].in = params[i].in; + this->fields.parameters[i + 1].out = params[i].out; + } + + mtx_unlock(&glsl_type::mutex); +} + glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), @@ -900,6 +934,74 @@ glsl_type::get_subroutine_instance(const char *subroutine_name) } +static bool +function_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + if (key1->length != key2->length) + return 1; + + return memcmp(key1->fields.parameters, key2->fields.parameters, + (key1->length + 1) * sizeof(*key1->fields.parameters)); +} + + +static uint32_t +function_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + char hash_key[128]; + unsigned size = 0; + + size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + + for (unsigned i = 0; i < key->length; i++) { + if (size >= sizeof(hash_key)) + break; + + size += snprintf(& hash_key[size], sizeof(hash_key) - size, + "%p", (void *) key->fields.structure[i].type); + } + + return _mesa_hash_string(hash_key); +} + +const glsl_type * +glsl_type::get_function_instance(const glsl_type *return_type, + const glsl_function_param *params, + unsigned num_params) +{ + const glsl_type key(return_type, params, num_params); + + mtx_lock(&glsl_type::mutex); + + if (function_types == NULL) { + function_types = _mesa_hash_table_create(NULL, function_key_hash, + function_key_compare); + } + + struct hash_entry *entry = _mesa_hash_table_search(function_types, &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(return_type, params, num_params); + mtx_lock(&glsl_type::mutex); + + _mesa_hash_table_insert(function_types, t, (void *) t); + } + + const glsl_type *t = (const glsl_type *)entry->data; + + assert(t->base_type == GLSL_TYPE_FUNCTION); + assert(t->length == num_params); + + mtx_unlock(&glsl_type::mutex); + + return t; +} + + const glsl_type * glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) { @@ -1029,6 +1131,7 @@ glsl_type::component_slots() const return 1; case GLSL_TYPE_SUBROUTINE: return 1; + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_VOID: @@ -1402,6 +1505,7 @@ glsl_type::count_attribute_slots() const case GLSL_TYPE_ARRAY: return this->length * this->fields.array->count_attribute_slots(); + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index e7c73dac3c3..28e2e93a305 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -56,6 +56,7 @@ enum glsl_base_type { GLSL_TYPE_IMAGE, GLSL_TYPE_ATOMIC_UINT, GLSL_TYPE_STRUCT, + GLSL_TYPE_FUNCTION, GLSL_TYPE_INTERFACE, GLSL_TYPE_ARRAY, GLSL_TYPE_VOID, @@ -179,7 +180,7 @@ struct glsl_type { */ union { const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_function_param *parameters; /**< Parameters to function. */ struct glsl_struct_field *structure; /**< List of struct fields. */ } fields; @@ -265,6 +266,12 @@ struct glsl_type { const char *block_name); /** + * Get the instance of a function type + */ + static const glsl_type *get_function_instance(const struct glsl_type *return_type, + const glsl_function_param *parameters, + unsigned num_params); + /** * Get the instance of an subroutine type */ static const glsl_type *get_subroutine_instance(const char *subroutine_name); @@ -689,6 +696,10 @@ private: glsl_type(const glsl_struct_field *fields, unsigned num_fields, enum glsl_interface_packing packing, const char *name); + /** Constructor for interface types */ + glsl_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params); + /** Constructor for array types */ glsl_type(const glsl_type *array, unsigned length); @@ -704,6 +715,9 @@ private: /** Hash table containing the known interface types. */ static struct hash_table *interface_types; + /** Hash table containing the known function types. */ + static struct hash_table *function_types; + /** Hash table containing the known subroutine types. */ static struct hash_table *subroutine_types; @@ -734,6 +748,10 @@ private: /*@}*/ }; +#undef DECL_TYPE +#undef STRUCT_TYPE +#endif /* __cplusplus */ + struct glsl_struct_field { const struct glsl_type *type; const char *name; @@ -782,6 +800,7 @@ struct glsl_struct_field { */ int stream; +#ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), @@ -794,6 +813,14 @@ struct glsl_struct_field { { /* empty */ } +#endif +}; + +struct glsl_function_param { + const struct glsl_type *type; + + bool in; + bool out; }; static inline unsigned int @@ -802,8 +829,4 @@ glsl_align(unsigned int a, unsigned int align) return (a + align - 1) / align * align; } -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - #endif /* GLSL_TYPES_H */ diff --git a/src/glsl/ir.h b/src/glsl/ir.h index ede8caa6e47..7aac9af9001 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -689,6 +689,11 @@ public: unsigned explicit_index:1; /** + * Do we have a Vulkan (group, index) qualifier for this variable? + */ + unsigned vk_set:1; + + /** * Was an initial binding explicitly set in the shader? * * If so, constant_value contains an integer ir_constant representing the @@ -757,8 +762,10 @@ public: * \note * The GLSL spec only allows the values 0 or 1 for the index in \b dual * source blending. + * + * This is now also used for the Vulkan descriptor set index. */ - unsigned index:1; + int16_t index; /** * \brief Layout qualifier for gl_FragDepth. @@ -807,6 +814,11 @@ public: int16_t binding; /** + * Vulkan descriptor set for the resource. + */ + int16_t set; + + /** * Storage location of the base of this variable * * The precise meaning of this field depends on the nature of the variable. diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index a8fac183a8d..4edf70dba5d 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -363,6 +363,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const return c; } + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp index 510294783a0..981c1f75571 100644 --- a/src/glsl/link_uniform_block_active_visitor.cpp +++ b/src/glsl/link_uniform_block_active_visitor.cpp @@ -54,6 +54,11 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) b->binding = 0; } + if (var->data.vk_set) { + b->set = var->data.set; + b->index = var->data.index; + } + _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); return b; } else { diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h index b663a884db4..d8aefd69991 100644 --- a/src/glsl/link_uniform_block_active_visitor.h +++ b/src/glsl/link_uniform_block_active_visitor.h @@ -35,6 +35,8 @@ struct link_uniform_block_active { unsigned num_array_elements; unsigned binding; + unsigned set; + unsigned index; bool has_instance_name; bool has_binding; diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index 4df39e200d5..b80e5736f6b 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -293,6 +293,8 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + blocks[i].Set = b->set; + blocks[i].Binding = b->binding; blocks[i].IsShaderStorage = b->is_shader_storage; i++; @@ -313,6 +315,8 @@ link_uniform_blocks(void *mem_ctx, blocks[i].NumUniforms = (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + blocks[i].Set = b->set; + blocks[i].Binding = b->binding; blocks[i].IsShaderStorage = b->is_shader_storage; i++; diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index d61ae91f4ad..c482fbfdfb2 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -88,6 +88,7 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_IMAGE: case GLSL_TYPE_ATOMIC_UINT: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_SUBROUTINE: case GLSL_TYPE_ERROR: diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 77327b6c74f..27dabd3b8f2 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -43,7 +43,7 @@ namespace { class nir_visitor : public ir_visitor { public: - nir_visitor(nir_shader *shader, gl_shader_stage stage); + nir_visitor(nir_shader *shader, struct gl_shader *sh, gl_shader_stage stage); ~nir_visitor(); virtual void visit(ir_variable *); @@ -83,6 +83,8 @@ private: bool supports_ints; + struct gl_shader *sh; + nir_shader *shader; gl_shader_stage stage; nir_function_impl *impl; @@ -133,7 +135,7 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) { nir_shader *shader = nir_shader_create(NULL, options); - nir_visitor v1(shader, sh->Stage); + nir_visitor v1(shader, sh, sh->Stage); nir_function_visitor v2(&v1); v2.run(sh->ir); visit_exec_list(sh->ir, &v1); @@ -141,10 +143,12 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) return shader; } -nir_visitor::nir_visitor(nir_shader *shader, gl_shader_stage stage) +nir_visitor::nir_visitor(nir_shader *shader, struct gl_shader *sh, + gl_shader_stage stage) { this->supports_ints = shader->options->native_integers; this->shader = shader; + this->sh = sh; this->stage = stage; this->is_global = true; this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, @@ -325,6 +329,7 @@ nir_visitor::visit(ir_variable *ir) } var->data.index = ir->data.index; + var->data.descriptor_set = ir->data.set; var->data.binding = ir->data.binding; /* XXX Get rid of buffer_index */ var->data.atomic.buffer_index = ir->data.binding; @@ -990,11 +995,21 @@ nir_visitor::visit(ir_expression *ir) } else { op = nir_intrinsic_load_ubo_indirect; } + + ir_constant *const_block = ir->operands[0]->as_constant(); + assert(const_block && "can't figure out descriptor set index"); + unsigned index = const_block->value.u[0]; + unsigned set = sh->UniformBlocks[index].Set; + unsigned binding = sh->UniformBlocks[index].Binding; + nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); load->num_components = ir->type->vector_elements; - load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ - load->const_index[1] = 1; /* number of vec4's */ - load->src[0] = evaluate_rvalue(ir->operands[0]); + load->const_index[0] = set; + load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */ + nir_load_const_instr *load_binding = nir_load_const_instr_create(shader, 1); + load_binding->value.u[0] = binding; + nir_instr_insert_after_cf_list(this->cf_node_list, &load_binding->instr); + load->src[0] = nir_src_for_ssa(&load_binding->def); if (!const_index) load->src[1] = evaluate_rvalue(ir->operands[1]); add_instr(&load->instr, ir->type->vector_elements); diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 222a219d0e6..70af06e6971 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -301,6 +301,7 @@ typedef struct { * * For array types, this represents the binding point for the first element. */ + int descriptor_set; int binding; /** @@ -515,7 +516,11 @@ typedef struct nir_src { bool is_ssa; } nir_src; -#define NIR_SRC_INIT (nir_src) { { NULL } } +#ifdef __cplusplus +# define NIR_SRC_INIT nir_src() +#else +# define NIR_SRC_INIT (nir_src) { { NULL } } +#endif #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@ -538,7 +543,11 @@ typedef struct { bool is_ssa; } nir_dest; -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#ifdef __cplusplus +# define NIR_DEST_INIT nir_dest() +#else +# define NIR_DEST_INIT (nir_dest) { { { NULL } } } +#endif #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) @@ -777,6 +786,15 @@ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) +/** Returns the tail of a deref chain */ +static inline nir_deref * +nir_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + return deref; +} + typedef struct { nir_instr instr; @@ -971,6 +989,9 @@ typedef struct { /* gather component selector */ unsigned component : 2; + /* The descriptor set containing this texture */ + unsigned sampler_set; + /** The sampler index * * If this texture instruction has a nir_tex_src_sampler_offset source, @@ -1661,6 +1682,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, gl_shader_stage stage); +void nir_lower_samplers_for_vk(nir_shader *shader); void nir_lower_system_values(nir_shader *shader); void nir_lower_tex_projector(nir_shader *shader); diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 9223e838095..7d449262585 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -28,6 +28,10 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + + nir_block *before_block; + nir_block *after_block; + nir_instr *before_instr; nir_instr *after_instr; @@ -48,6 +52,30 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_block = NULL; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_before_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = block; + build->after_block = NULL; + build->before_instr = NULL; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_after_block(nir_builder *build, + nir_block *block) +{ + build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = block; build->before_instr = NULL; build->after_instr = NULL; } @@ -56,6 +84,8 @@ static inline void nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = before_instr; build->after_instr = NULL; } @@ -64,6 +94,8 @@ static inline void nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) { build->cf_node_list = NULL; + build->before_block = NULL; + build->after_block = NULL; build->before_instr = NULL; build->after_instr = after_instr; } @@ -73,6 +105,10 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { if (build->cf_node_list) { nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else if (build->before_block) { + nir_instr_insert_before_block(build->before_block, instr); + } else if (build->after_block) { + nir_instr_insert_after_block(build->after_block, instr); } else if (build->before_instr) { nir_instr_insert_before(build->before_instr, instr); } else { @@ -240,6 +276,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index bc6e6b8f498..64861300b55 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -139,11 +139,12 @@ SYSTEM_VALUE(sample_mask_in, 1) SYSTEM_VALUE(invocation_id, 1) /* - * The first and only index is the base address to load from. Indirect - * loads have an additional register input, which is added to the constant - * address to compute the final address to load from. For UBO's (and - * SSBO's), the first source is the (possibly constant) UBO buffer index - * and the indirect (if it exists) is the second source. + * The last index is the base address to load from. Indirect loads have an + * additional register input, which is added to the constant address to + * compute the final address to load from. For UBO's (and SSBO's), the first + * source is the (possibly constant) UBO buffer index and the indirect (if it + * exists) is the second source, and the first index is the descriptor set + * index. * * For vector backends, the address is in terms of one vec4, and so each array * element is +4 scalar components from the previous array element. For scalar @@ -151,14 +152,14 @@ SYSTEM_VALUE(invocation_id, 1) * elements begin immediately after the previous array element. */ -#define LOAD(name, extra_srcs, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \ +#define LOAD(name, extra_srcs, extra_indices, flags) \ + INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1 + extra_indices, flags) \ INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, 1, flags) + true, 0, 0, 1 + extra_indices, flags) -LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(uniform, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(input, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* LOAD(ssbo, 1, 0) */ /* diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 71bfd347c1a..6a4494d5fd2 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -108,6 +108,7 @@ type_size_scalar(const struct glsl_type *type) return 0; case GLSL_TYPE_IMAGE: return 0; + case GLSL_TYPE_FUNCTION: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_DOUBLE: diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 7a0b0a09ffe..9a9cdd16a9a 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -35,6 +35,30 @@ extern "C" { #include "program/program.h" } +static void +add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) +{ + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, + indirect); +} + static unsigned get_sampler_index(const struct gl_shader_program *shader_program, gl_shader_stage stage, const char *name) @@ -176,3 +200,49 @@ nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_pr lower_impl(overload->impl, shader_program, stage); } } + +static bool +lower_samplers_for_vk_block(nir_block *block, void *data) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + assert(tex->sampler); + + tex->sampler_set = tex->sampler->var->data.descriptor_set; + tex->sampler_index = tex->sampler->var->data.binding; + + if (tex->sampler->deref.child) { + assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); + nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); + + /* Only one-level arrays are allowed in vulkan */ + assert(arr->deref.child == NULL); + + tex->sampler_index += arr->base_offset; + if (arr->deref_array_type == nir_deref_array_type_indirect) { + add_indirect_to_tex(tex, arr->indirect); + nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); + + tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); + } + } + + tex->sampler = NULL; + } + + return true; +} + +extern "C" void +nir_lower_samplers_for_vk(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); + } + } +} diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c index 21672901f04..98c107aa50e 100644 --- a/src/glsl/nir/nir_lower_var_copies.c +++ b/src/glsl/nir/nir_lower_var_copies.c @@ -53,17 +53,6 @@ deref_next_wildcard_parent(nir_deref *deref) return NULL; } -/* Returns the last deref in the chain. - */ -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child) - deref = deref->child; - - return deref; -} - /* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. @@ -121,8 +110,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, } else { /* In this case, we have no wildcards anymore, so all we have to do * is just emit the load and store operations. */ - src_tail = get_deref_tail(src_tail); - dest_tail = get_deref_tail(dest_tail); + src_tail = nir_deref_tail(src_tail); + dest_tail = nir_deref_tail(dest_tail); assert(src_tail->type == dest_tail->type); diff --git a/src/glsl/nir/nir_spirv.h b/src/glsl/nir/nir_spirv.h new file mode 100644 index 00000000000..3254f10a88d --- /dev/null +++ b/src/glsl/nir/nir_spirv.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#pragma once + +#ifndef _NIR_SPIRV_H_ +#define _NIR_SPIRV_H_ + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +nir_shader *spirv_to_nir(const uint32_t *words, size_t word_count, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif + +#endif /* _NIR_SPIRV_H_ */ diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c index fc72c078c77..5c163b59819 100644 --- a/src/glsl/nir/nir_split_var_copies.c +++ b/src/glsl/nir/nir_split_var_copies.c @@ -66,14 +66,6 @@ struct split_var_copies_state { void *dead_ctx; }; -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child != NULL) - deref = deref->child; - return deref; -} - /* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the @@ -225,8 +217,8 @@ split_var_copies_block(nir_block *block, void *void_state) nir_deref *dest_head = &intrinsic->variables[0]->deref; nir_deref *src_head = &intrinsic->variables[1]->deref; - nir_deref *dest_tail = get_deref_tail(dest_head); - nir_deref *src_tail = get_deref_tail(src_head); + nir_deref *dest_tail = nir_deref_tail(dest_head); + nir_deref *src_tail = nir_deref_tail(src_head); switch (glsl_get_base_type(src_tail->type)) { case GLSL_TYPE_ARRAY: diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 62176f508a1..f3f3af97fde 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -70,6 +70,18 @@ glsl_get_struct_field(const glsl_type *type, unsigned index) return type->fields.structure[index].type; } +const glsl_type * +glsl_get_function_return_type(const glsl_type *type) +{ + return type->fields.parameters[0].type; +} + +const glsl_function_param * +glsl_get_function_param(const glsl_type *type, unsigned index) +{ + return &type->fields.parameters[index + 1]; +} + const struct glsl_type * glsl_get_column_type(const struct glsl_type *type) { @@ -112,6 +124,20 @@ glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) return type->fields.structure[index].name; } +glsl_sampler_dim +glsl_get_sampler_dim(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_sampler_dim)type->sampler_dimensionality; +} + +glsl_base_type +glsl_get_sampler_result_type(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_base_type)type->sampler_type; +} + bool glsl_type_is_void(const glsl_type *type) { @@ -131,11 +157,49 @@ glsl_type_is_scalar(const struct glsl_type *type) } bool +glsl_type_is_vector_or_scalar(const struct glsl_type *type) +{ + return type->is_vector() || type->is_scalar(); +} + +bool glsl_type_is_matrix(const struct glsl_type *type) { return type->is_matrix(); } +bool +glsl_type_is_array(const struct glsl_type *type) +{ + return type->is_array(); +} + +bool +glsl_type_is_struct(const struct glsl_type *type) +{ + return type->is_record() || type->is_interface(); +} + +bool +glsl_type_is_sampler(const struct glsl_type *type) +{ + return type->is_sampler(); +} + +bool +glsl_sampler_type_is_shadow(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_shadow; +} + +bool +glsl_sampler_type_is_array(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return type->sampler_array; +} + const glsl_type * glsl_void_type(void) { @@ -149,13 +213,79 @@ glsl_float_type(void) } const glsl_type * +glsl_int_type(void) +{ + return glsl_type::int_type; +} + +const glsl_type * +glsl_uint_type(void) +{ + return glsl_type::uint_type; +} + +const glsl_type * +glsl_bool_type(void) +{ + return glsl_type::bool_type; +} + +const glsl_type * glsl_vec4_type(void) { return glsl_type::vec4_type; } const glsl_type * +glsl_scalar_type(enum glsl_base_type base_type) +{ + return glsl_type::get_instance(base_type, 1, 1); +} + +const glsl_type * +glsl_vector_type(enum glsl_base_type base_type, unsigned components) +{ + assert(components > 1 && components <= 4); + return glsl_type::get_instance(base_type, components, 1); +} + +const glsl_type * +glsl_matrix_type(enum glsl_base_type base_type, unsigned rows, unsigned columns) +{ + assert(rows >= 1 && rows <= 4 && columns >= 1 && columns <= 4); + return glsl_type::get_instance(base_type, rows, columns); +} + +const glsl_type * glsl_array_type(const glsl_type *base, unsigned elements) { return glsl_type::get_array_instance(base, elements); } + +const glsl_type * +glsl_struct_type(const glsl_struct_field *fields, + unsigned num_fields, const char *name) +{ + return glsl_type::get_record_instance(fields, num_fields, name); +} + +const struct glsl_type * +glsl_sampler_type(enum glsl_sampler_dim dim, bool is_shadow, bool is_array, + enum glsl_base_type base_type) +{ + return glsl_type::get_sampler_instance(dim, is_shadow, is_array, base_type); +} + +const glsl_type * +glsl_function_type(const glsl_type *return_type, + const glsl_function_param *params, unsigned num_params) +{ + return glsl_type::get_function_instance(return_type, params, num_params); +} + +const glsl_type * +glsl_transposed_type(const struct glsl_type *type) +{ + return glsl_type::get_instance(type->base_type, type->matrix_columns, + type->vector_elements); +} diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 276d4ad6234..a2fa7934e16 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -49,6 +49,12 @@ const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); const struct glsl_type *glsl_get_column_type(const struct glsl_type *type); +const struct glsl_type * +glsl_get_function_return_type(const struct glsl_type *type); + +const struct glsl_function_param * +glsl_get_function_param(const struct glsl_type *type, unsigned index); + enum glsl_base_type glsl_get_base_type(const struct glsl_type *type); unsigned glsl_get_vector_elements(const struct glsl_type *type); @@ -62,17 +68,44 @@ unsigned glsl_get_length(const struct glsl_type *type); const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); +bool glsl_type_is_vector_or_scalar(const struct glsl_type *type); bool glsl_type_is_matrix(const struct glsl_type *type); +bool glsl_type_is_array(const struct glsl_type *type); +bool glsl_type_is_struct(const struct glsl_type *type); +bool glsl_type_is_sampler(const struct glsl_type *type); +bool glsl_sampler_type_is_shadow(const struct glsl_type *type); +bool glsl_sampler_type_is_array(const struct glsl_type *type); const struct glsl_type *glsl_void_type(void); const struct glsl_type *glsl_float_type(void); +const struct glsl_type *glsl_int_type(void); +const struct glsl_type *glsl_uint_type(void); +const struct glsl_type *glsl_bool_type(void); + const struct glsl_type *glsl_vec4_type(void); +const struct glsl_type *glsl_scalar_type(enum glsl_base_type base_type); +const struct glsl_type *glsl_vector_type(enum glsl_base_type base_type, + unsigned components); +const struct glsl_type *glsl_matrix_type(enum glsl_base_type base_type, + unsigned rows, unsigned columns); const struct glsl_type *glsl_array_type(const struct glsl_type *base, unsigned elements); +const struct glsl_type *glsl_struct_type(const struct glsl_struct_field *fields, + unsigned num_fields, const char *name); +const struct glsl_type *glsl_sampler_type(enum glsl_sampler_dim dim, + bool is_shadow, bool is_array, + enum glsl_base_type base_type); +const struct glsl_type * glsl_function_type(const struct glsl_type *return_type, + const struct glsl_function_param *params, + unsigned num_params); + +const struct glsl_type *glsl_transposed_type(const struct glsl_type *type); #ifdef __cplusplus } diff --git a/src/glsl/nir/spirv.h b/src/glsl/nir/spirv.h new file mode 100644 index 00000000000..55bdcbee8b5 --- /dev/null +++ b/src/glsl/nir/spirv.h @@ -0,0 +1,808 @@ +/* +** Copyright (c) 2015 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Specification revision 31. +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 99; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL = 3, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthAny = 13, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeInputTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeInputQuads = 24, + SpvExecutionModeInputIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroupLocal = 4, + SpvStorageClassWorkgroupGlobal = 5, + SpvStorageClassPrivateGlobal = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsOffsetShift = 3, + SpvImageOperandsOffsetsShift = 4, + SpvImageOperandsSampleShift = 5, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsOffsetMask = 0x00000008, + SpvImageOperandsOffsetsMask = 0x00000010, + SpvImageOperandsSampleMask = 0x00000020, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeSVM = 6, + SpvFunctionParameterAttributeNoWrite = 7, + SpvFunctionParameterAttributeNoReadWrite = 8, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationSmooth = 12, + SpvDecorationNoperspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonwritable = 24, + SpvDecorationNonreadable = 25, + SpvDecorationUniform = 26, + SpvDecorationNoStaticUse = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipVertex = 2, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragColor = 21, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInWorkgroupLinearId = 35, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsRelaxedShift = 0, + SpvMemorySemanticsSequentiallyConsistentShift = 1, + SpvMemorySemanticsAcquireShift = 2, + SpvMemorySemanticsReleaseShift = 3, + SpvMemorySemanticsUniformMemoryShift = 4, + SpvMemorySemanticsSubgroupMemoryShift = 5, + SpvMemorySemanticsWorkgroupLocalMemoryShift = 6, + SpvMemorySemanticsWorkgroupGlobalMemoryShift = 7, + SpvMemorySemanticsAtomicCounterMemoryShift = 8, + SpvMemorySemanticsImageMemoryShift = 9, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsRelaxedMask = 0x00000001, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000002, + SpvMemorySemanticsAcquireMask = 0x00000004, + SpvMemorySemanticsReleaseMask = 0x00000008, + SpvMemorySemanticsUniformMemoryMask = 0x00000010, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000020, + SpvMemorySemanticsWorkgroupLocalMemoryMask = 0x00000040, + SpvMemorySemanticsWorkgroupGlobalMemoryMask = 0x00000080, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000100, + SpvMemorySemanticsImageMemoryMask = 0x00000200, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityImageSRGBWrite = 16, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImageQueryDim = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpIMulExtended = 151, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicIMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicIMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpAsyncGroupCopy = 259, + SpvOpWaitGroupEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, +} SpvOp; + +#endif // #ifndef spirv_H + diff --git a/src/glsl/nir/spirv2nir.c b/src/glsl/nir/spirv2nir.c new file mode 100644 index 00000000000..0eed23fbc3f --- /dev/null +++ b/src/glsl/nir/spirv2nir.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +/* + * A simple executable that opens a SPIR-V shader, converts it to NIR, and + * dumps out the result. This should be useful for testing the + * spirv_to_nir code. + */ + +#include "nir_spirv.h" + +#include <sys/mman.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> + +int main(int argc, char **argv) +{ + int fd = open(argv[1], O_RDONLY); + off_t len = lseek(fd, 0, SEEK_END); + + assert(len % 4 == 0); + size_t word_count = len / 4; + + const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0); + assert(map != NULL); + + nir_shader *shader = spirv_to_nir(map, word_count, NULL); + nir_print_shader(shader, stderr); +} diff --git a/src/glsl/nir/spirv_glsl450_to_nir.c b/src/glsl/nir/spirv_glsl450_to_nir.c new file mode 100644 index 00000000000..52b048820f3 --- /dev/null +++ b/src/glsl/nir/spirv_glsl450_to_nir.c @@ -0,0 +1,285 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "spirv_to_nir_private.h" + +enum GLSL450Entrypoint { + Round = 0, + RoundEven = 1, + Trunc = 2, + Abs = 3, + Sign = 4, + Floor = 5, + Ceil = 6, + Fract = 7, + + Radians = 8, + Degrees = 9, + Sin = 10, + Cos = 11, + Tan = 12, + Asin = 13, + Acos = 14, + Atan = 15, + Sinh = 16, + Cosh = 17, + Tanh = 18, + Asinh = 19, + Acosh = 20, + Atanh = 21, + Atan2 = 22, + + Pow = 23, + Exp = 24, + Log = 25, + Exp2 = 26, + Log2 = 27, + Sqrt = 28, + InverseSqrt = 29, + + Determinant = 30, + MatrixInverse = 31, + + Modf = 32, // second argument needs the OpVariable = , not an OpLoad + Min = 33, + Max = 34, + Clamp = 35, + Mix = 36, + Step = 37, + SmoothStep = 38, + + FloatBitsToInt = 39, + FloatBitsToUint = 40, + IntBitsToFloat = 41, + UintBitsToFloat = 42, + + Fma = 43, + Frexp = 44, + Ldexp = 45, + + PackSnorm4x8 = 46, + PackUnorm4x8 = 47, + PackSnorm2x16 = 48, + PackUnorm2x16 = 49, + PackHalf2x16 = 50, + PackDouble2x32 = 51, + UnpackSnorm2x16 = 52, + UnpackUnorm2x16 = 53, + UnpackHalf2x16 = 54, + UnpackSnorm4x8 = 55, + UnpackUnorm4x8 = 56, + UnpackDouble2x32 = 57, + + Length = 58, + Distance = 59, + Cross = 60, + Normalize = 61, + Ftransform = 62, + FaceForward = 63, + Reflect = 64, + Refract = 65, + + UaddCarry = 66, + UsubBorrow = 67, + UmulExtended = 68, + ImulExtended = 69, + BitfieldExtract = 70, + BitfieldInsert = 71, + BitfieldReverse = 72, + BitCount = 73, + FindLSB = 74, + FindMSB = 75, + + InterpolateAtCentroid = 76, + InterpolateAtSample = 77, + InterpolateAtOffset = 78, + + Count +}; + +static nir_ssa_def* +build_length(nir_builder *b, nir_ssa_def *vec) +{ + switch (vec->num_components) { + case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec)); + case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec)); + case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec)); + case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec)); + default: + unreachable("Invalid number of components"); + } +} + +static void +handle_glsl450_alu(struct vtn_builder *b, enum GLSL450Entrypoint entrypoint, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = rzalloc(b, struct vtn_ssa_value); + val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 5; + nir_ssa_def *src[3]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 5])->def; + + nir_op op; + switch (entrypoint) { + case Round: op = nir_op_fround_even; break; /* TODO */ + case RoundEven: op = nir_op_fround_even; break; + case Trunc: op = nir_op_ftrunc; break; + case Abs: op = nir_op_fabs; break; + case Sign: op = nir_op_fsign; break; + case Floor: op = nir_op_ffloor; break; + case Ceil: op = nir_op_fceil; break; + case Fract: op = nir_op_ffract; break; + case Radians: + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 0.01745329251)); + return; + case Degrees: + val->ssa->def = nir_fmul(&b->nb, src[0], nir_imm_float(&b->nb, 57.2957795131)); + return; + case Sin: op = nir_op_fsin; break; + case Cos: op = nir_op_fcos; break; + case Tan: + val->ssa->def = nir_fdiv(&b->nb, nir_fsin(&b->nb, src[0]), + nir_fcos(&b->nb, src[0])); + return; + case Pow: op = nir_op_fpow; break; + case Exp2: op = nir_op_fexp2; break; + case Log2: op = nir_op_flog2; break; + case Sqrt: op = nir_op_fsqrt; break; + case InverseSqrt: op = nir_op_frsq; break; + + case Modf: op = nir_op_fmod; break; + case Min: op = nir_op_fmin; break; + case Max: op = nir_op_fmax; break; + case Mix: op = nir_op_flrp; break; + case Step: + val->ssa->def = nir_sge(&b->nb, src[1], src[0]); + return; + + case FloatBitsToInt: + case FloatBitsToUint: + case IntBitsToFloat: + case UintBitsToFloat: + /* Probably going to be removed from the final version of the spec. */ + val->ssa->def = src[0]; + return; + + case Fma: op = nir_op_ffma; break; + case Ldexp: op = nir_op_ldexp; break; + + /* Packing/Unpacking functions */ + case PackSnorm4x8: op = nir_op_pack_snorm_4x8; break; + case PackUnorm4x8: op = nir_op_pack_unorm_4x8; break; + case PackSnorm2x16: op = nir_op_pack_snorm_2x16; break; + case PackUnorm2x16: op = nir_op_pack_unorm_2x16; break; + case PackHalf2x16: op = nir_op_pack_half_2x16; break; + case UnpackSnorm4x8: op = nir_op_unpack_snorm_4x8; break; + case UnpackUnorm4x8: op = nir_op_unpack_unorm_4x8; break; + case UnpackSnorm2x16: op = nir_op_unpack_snorm_2x16; break; + case UnpackUnorm2x16: op = nir_op_unpack_unorm_2x16; break; + case UnpackHalf2x16: op = nir_op_unpack_half_2x16; break; + + case Length: + val->ssa->def = build_length(&b->nb, src[0]); + return; + case Distance: + val->ssa->def = build_length(&b->nb, nir_fsub(&b->nb, src[0], src[1])); + return; + case Normalize: + val->ssa->def = nir_fdiv(&b->nb, src[0], build_length(&b->nb, src[0])); + return; + + case UaddCarry: op = nir_op_uadd_carry; break; + case UsubBorrow: op = nir_op_usub_borrow; break; + case BitfieldExtract: op = nir_op_ubitfield_extract; break; /* TODO */ + case BitfieldInsert: op = nir_op_bitfield_insert; break; + case BitfieldReverse: op = nir_op_bitfield_reverse; break; + case BitCount: op = nir_op_bit_count; break; + case FindLSB: op = nir_op_find_lsb; break; + case FindMSB: op = nir_op_ufind_msb; break; /* TODO */ + + case Exp: + case Log: + case Clamp: + case Asin: + case Acos: + case Atan: + case Atan2: + case Sinh: + case Cosh: + case Tanh: + case Asinh: + case Acosh: + case Atanh: + case SmoothStep: + case Frexp: + case PackDouble2x32: + case UnpackDouble2x32: + case Cross: + case Ftransform: + case FaceForward: + case Reflect: + case Refract: + case UmulExtended: + case ImulExtended: + default: + unreachable("Unhandled opcode"); + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(val->ssa->type), val->name); + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +bool +vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count) +{ + switch ((enum GLSL450Entrypoint)ext_opcode) { + case Determinant: + case MatrixInverse: + case InterpolateAtCentroid: + case InterpolateAtSample: + case InterpolateAtOffset: + unreachable("Unhandled opcode"); + + default: + handle_glsl450_alu(b, (enum GLSL450Entrypoint)ext_opcode, words, count); + } + + return true; +} diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c new file mode 100644 index 00000000000..65a995c29de --- /dev/null +++ b/src/glsl/nir/spirv_to_nir.c @@ -0,0 +1,2978 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "spirv_to_nir_private.h" +#include "nir_vla.h" + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); +} + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + int new_member = member; + + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + if (dec->member >= 0) { + assert(member == -1); + new_member = dec->member; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, new_member, dec->group, + cb, data); + } else { + cb(b, base_value, new_member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_undef); + break; + + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpDecorate: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[target]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationSmooth: + ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: + val->type->type = glsl_int_type(); + break; + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + const struct glsl_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base)); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + val->type->type = glsl_array_type(array_element->type, w[3]); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + /* TODO: Handle decorators */ + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + fields[i].stream = -1; + } + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + + assert(w[6] == 0 && "FIXME: Handl multi-sampled textures"); + assert(w[7] == 1 && "FIXME: Add support for non-sampled images"); + + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeRuntimeArray: + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_get_builtin_location(SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_out; + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + *mode = nir_var_shader_out; + break; + case SpvBuiltInClipVertex: + *location = VARYING_SLOT_CLIP_VERTEX; + *mode = nir_var_shader_out; + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + *location = SYSTEM_VALUE_VERTEX_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + *mode = nir_var_system_value; + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + *mode = nir_var_shader_out; + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + *mode = nir_var_shader_in; + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + *mode = nir_var_shader_in; + break; + case SpvBuiltInFragColor: + *location = FRAG_RESULT_COLOR; + *mode = nir_var_shader_out; + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + *mode = nir_var_shader_out; + break; + case SpvBuiltInHelperInvocation: + unreachable("unsupported builtin"); /* XXX */ + break; + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + /* these are constants, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + case SpvBuiltInLocalInvocationId: + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + unreachable("no compute shader support"); + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationSmooth: + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoperspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonwritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.explicit_location = true; + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + nir_variable_mode mode; + vtn_get_builtin_location(dec->literals[0], &var->data.location, + &mode); + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + b->builtins[dec->literals[0]] = var; + break; + } + case SpvDecorationNoStaticUse: + /* This can safely be ignored */ + break; + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonreadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var = b->builtins[builtin]; + + if (!var) { + var = ralloc(b->shader, nir_variable); + var->type = type; + + nir_variable_mode mode; + vtn_get_builtin_location(builtin, &var->data.location, &mode); + var->data.mode = mode; + var->name = ralloc_strdup(var, "builtin"); + + switch (mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + default: + unreachable("bad builtin mode"); + } + + b->builtins[builtin] = var; + } + + return var; +} + +static void +vtn_builtin_load(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + nir_ssa_dest_init(&load->instr, &load->dest, + glsl_get_vector_elements(val->type), NULL); + + load->variables[0] = nir_deref_var_create(load, var); + load->num_components = glsl_get_vector_elements(val->type); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; +} + +static void +vtn_builtin_store(struct vtn_builder *b, + struct vtn_ssa_value *val, + SpvBuiltIn builtin) +{ + assert(glsl_type_is_vector_or_scalar(val->type)); + + nir_variable *var = get_builtin_variable(b, val->type, builtin); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + + store->variables[0] = nir_deref_var_create(store, var); + store->num_components = glsl_get_vector_elements(val->type); + store->src[0] = nir_src_for_ssa(val->def); + nir_builder_instr_insert(&b->nb, &store->instr); +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, struct vtn_type *src_type, + nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + if (src_type->is_builtin) { + vtn_builtin_load(b, val, src_type->builtin); + return val; + } + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->array_element, + &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, + src_type->members[i], + &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, struct vtn_type *dest_type, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + if (dest_type->is_builtin) { + vtn_builtin_store(b, src, dest_type->builtin); + return; + } + + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_type->array_element, dest_deref, + &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_type->members[i], dest_deref, + &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, nir_ssa_def *binding, + unsigned offset, nir_ssa_def *indirect, + struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); + load->const_index[0] = set; + load->src[0] = nir_src_for_ssa(binding); + load->const_index[1] = offset; + if (indirect) + load->src[1] = nir_src_for_ssa(indirect); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + type->offsets[i], + indirect, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, + offset + i * type->stride, + indirect, type->array_element); + } + } + } + + return val; +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + unsigned set = src->var->data.descriptor_set; + + nir_ssa_def *binding = nir_imm_int(&b->nb, src->var->data.binding); + nir_deref *deref = &src->deref; + + /* The block variable may be an array, in which case the array index adds + * an offset to the binding. Figure out that index now. + */ + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + type = type->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + binding = nir_imm_int(&b->nb, src->var->data.binding + + deref_array->base_offset); + } else { + binding = nir_iadd(&b->nb, binding, deref_array->indirect.ssa); + } + } + + unsigned offset = 0; + nir_ssa_def *indirect = NULL; + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + offset += type->stride * deref_array->base_offset; + } else { + nir_ssa_def *offset = nir_imul(&b->nb, deref_array->indirect.ssa, + nir_imm_int(&b->nb, type->stride)); + indirect = indirect ? nir_iadd(&b->nb, indirect, offset) : offset; + } + type = type->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + offset += type->offsets[deref_struct->index]; + type = type->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } + + /* TODO SSBO's */ + nir_intrinsic_op op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; + + return _vtn_block_load(b, op, set, binding, offset, indirect, type); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) +{ + nir_deref *src_tail = get_deref_tail(src); + + struct vtn_ssa_value *val; + if (src->var->interface_type) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_type, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_type, + dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest_type, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest_type, dest, dest_tail, src); + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest, struct vtn_type *type) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child || src->var->interface_type) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + nir_variable *var = ralloc(b->shader, nir_variable); + + var->type = type->type; + var->name = ralloc_strdup(var, val->name); + + bool builtin_block = false; + if (type->block) { + var->interface_type = type->type; + builtin_block = type->builtin_block; + } else if (glsl_type_is_array(type->type) && + (type->array_element->block || + type->array_element->buffer_block)) { + var->interface_type = type->array_element->type; + builtin_block = type->array_element->builtin_block; + } else { + var->interface_type = NULL; + } + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + var->data.mode = nir_var_uniform; + var->data.read_only = true; + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivateGlobal: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassWorkgroupLocal: + case SpvStorageClassWorkgroupGlobal: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + val->deref = nir_deref_var_create(b, var); + val->deref_type = type; + + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + + /* If this was a uniform block, then we're not going to actually use the + * variable (we're only going to use it to compute offsets), so don't + * declare it in the shader. + */ + if (var->data.mode == nir_var_uniform && var->interface_type) + break; + + /* Builtin blocks are lowered to individual variables during SPIR-V -> + * NIR, so don't declare them either. + */ + if (builtin_block) + break; + + switch (var->data.mode) { + case nir_var_shader_in: + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case nir_var_shader_out: + exec_list_push_tail(&b->shader->outputs, &var->node); + break; + case nir_var_global: + exec_list_push_tail(&b->shader->globals, &var->node); + break; + case nir_var_local: + exec_list_push_tail(&b->impl->locals, &var->node); + break; + case nir_var_uniform: + exec_list_push_tail(&b->shader->uniforms, &var->node); + break; + case nir_var_system_value: + exec_list_push_tail(&b->shader->system_values, &var->node); + break; + } + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + nir_deref_var *base = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + nir_deref *tail = &val->deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = + nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + tail = tail->child; + } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (base->var->interface_type && base->var->data.mode == nir_var_uniform) + val->deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + else + val->deref_type = deref_type; + + + break; + } + + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + + vtn_variable_copy(b, src, dest, type); + break; + } + + case SpvOpLoad: { + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (glsl_get_base_type(src_type->type) == GLSL_TYPE_SAMPLER) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src, src_type); + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest, dest_type); + break; + } + + case SpvOpCopyMemorySized: + case SpvOpArrayLength: + case SpvOpImageTexelPointer: + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_value(b, index, vtn_value_type_ssa)->ssa->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + nir_deref_var *sampler = vtn_value(b, w[3], vtn_value_type_deref)->deref; + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* From now on, the remaining sources are "Optional Image Operands." */ + if (idx < count) { + /* XXX handle these (bias, lod, etc.) */ + assert(0); + } + + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = nir_deref_tail(&sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = nir_deref_as_var(nir_copy_deref(instr, &sampler->deref)); + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3])->def; + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_bany2; break; + case 3: op = nir_op_bany3; break; + case 4: op = nir_op_bany4; break; + } + break; + + case SpvOpAll: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_ball2; break; + case 3: op = nir_op_ball3; break; + case 4: op = nir_op_ball4; break; + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(type), val->name); + instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_phi_node_create(b, type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + nir_builder_insert_before_block(&b->nb, block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: + /* + * TODO properly handle these and give a real error if asking for too + * much. + */ + assert(w[1] == SpvCapabilityMatrix || + w[1] == SpvCapabilityShader); + break; + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExecutionMode: + /* + * TODO handle these - for Vulkan OriginUpperLeft is always set for + * fragment shaders, so we can ignore this for now + */ + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + b->func->overload = overload; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: + break; /* Does nothing */ + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + + struct exec_node *list_tail = exec_list_get_tail(b->nb.cf_node_list); + nir_cf_node *tail_node = exec_node_data(nir_cf_node, list_tail, node); + assert(tail_node->type == nir_cf_node_block); + block->block = nir_cf_node_as_block(tail_node); + break; + } + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: + vtn_push_value(b, w[2], vtn_value_type_undef); + break; + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + case SpvOpImageTexelPointer: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalOr: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalAnd: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) +{ + struct vtn_block *block = start; + while (block != end_block) { + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->nb.cf_node_list, &loop->cf_node); + + struct exec_list *old_list = b->nb.cf_node_list; + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + nir_builder_insert_after_cf_list(&b->nb, &loop->body); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = new_break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + b->block = block; + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + nir_cf_node *cur_cf_node = + exec_node_data(nir_cf_node, exec_list_get_tail(b->nb.cf_node_list), + node); + nir_block *cur_block = nir_cf_node_as_block(cur_cf_node); + _mesa_hash_table_insert(b->block_table, cur_block, block); + + switch (branch_op) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ + return; + } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ + block = branch_block; + continue; + } + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + nir_cf_node_insert_end(b->nb.cf_node_list, &if_stmt->cf_node); + + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + struct exec_list *old_list = b->nb.cf_node_list; + + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); + + nir_builder_insert_after_cf_list(&b->nb, &if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + nir_builder_insert_after_cf_list(&b->nb, old_list); + block = merge_block; + continue; + } + + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ + continue; + } + + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + + case SpvOpSwitch: + case SpvOpReturnValue: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] == 99); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + nir_builder_init(&b->nb, b->impl); + nir_builder_insert_after_cf_list(&b->nb, &b->impl->body); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + } + + ralloc_free(b); + + return shader; +} diff --git a/src/glsl/nir/spirv_to_nir_private.h b/src/glsl/nir/spirv_to_nir_private.h new file mode 100644 index 00000000000..decceff65a6 --- /dev/null +++ b/src/glsl/nir/spirv_to_nir_private.h @@ -0,0 +1,227 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand ([email protected]) + * + */ + +#include "nir.h" +#include "nir_spirv.h" +#include "nir_builder.h" +#include "spirv.h" + +struct vtn_builder; +struct vtn_decoration; + +enum vtn_value_type { + vtn_value_type_invalid = 0, + vtn_value_type_undef, + vtn_value_type_string, + vtn_value_type_decoration_group, + vtn_value_type_type, + vtn_value_type_constant, + vtn_value_type_deref, + vtn_value_type_function, + vtn_value_type_block, + vtn_value_type_ssa, + vtn_value_type_extension, +}; + +struct vtn_block { + /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */ + SpvOp merge_op; + uint32_t merge_block_id; + const uint32_t *label; + const uint32_t *branch; + nir_block *block; +}; + +struct vtn_function { + struct exec_node node; + + nir_function_overload *overload; + struct vtn_block *start_block; + + const uint32_t *end; +}; + +typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t, + const uint32_t *, unsigned); + +struct vtn_ssa_value { + union { + nir_ssa_def *def; + struct vtn_ssa_value **elems; + }; + + /* For matrices, a transposed version of the value, or NULL if it hasn't + * been computed + */ + struct vtn_ssa_value *transposed; + + const struct glsl_type *type; +}; + +struct vtn_type { + const struct glsl_type *type; + + /* for matrices, whether the matrix is stored row-major */ + bool row_major; + + /* for structs, the offset of each member */ + unsigned *offsets; + + /* for structs, whether it was decorated as a "non-SSBO-like" block */ + bool block; + + /* for structs, whether it was decorated as an "SSBO-like" block */ + bool buffer_block; + + /* for structs with block == true, whether this is a builtin block (i.e. a + * block that contains only builtins). + */ + bool builtin_block; + + /* for arrays and matrices, the array stride */ + unsigned stride; + + /* for arrays, the vtn_type for the elements of the array */ + struct vtn_type *array_element; + + /* for structures, the vtn_type for each member */ + struct vtn_type **members; + + /* Whether this type, or a parent type, has been decorated as a builtin */ + bool is_builtin; + + SpvBuiltIn builtin; +}; + +struct vtn_value { + enum vtn_value_type value_type; + const char *name; + struct vtn_decoration *decoration; + union { + void *ptr; + char *str; + struct vtn_type *type; + struct { + nir_constant *constant; + const struct glsl_type *const_type; + }; + struct { + nir_deref_var *deref; + struct vtn_type *deref_type; + }; + struct vtn_function *func; + struct vtn_block *block; + struct vtn_ssa_value *ssa; + vtn_instruction_handler ext_handler; + }; +}; + +struct vtn_decoration { + struct vtn_decoration *next; + int member; /* -1 if not a member decoration */ + const uint32_t *literals; + struct vtn_value *group; + SpvDecoration decoration; +}; + +struct vtn_builder { + nir_builder nb; + + nir_shader *shader; + nir_function_impl *impl; + struct vtn_block *block; + + /* + * In SPIR-V, constants are global, whereas in NIR, the load_const + * instruction we use is per-function. So while we parse each function, we + * keep a hash table of constants we've resolved to nir_ssa_value's so + * far, and we lazily resolve them when we see them used in a function. + */ + struct hash_table *const_table; + + /* + * Map from nir_block to the vtn_block which ends with it -- used for + * handling phi nodes. + */ + struct hash_table *block_table; + + /* + * NIR variable for each SPIR-V builtin. + */ + nir_variable *builtins[42]; /* XXX need symbolic constant from SPIR-V header */ + + unsigned value_id_bound; + struct vtn_value *values; + + SpvExecutionModel execution_model; + struct vtn_value *entry_point; + + struct vtn_function *func; + struct exec_list functions; +}; + +static inline struct vtn_value * +vtn_push_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + assert(value_id < b->value_id_bound); + assert(b->values[value_id].value_type == vtn_value_type_invalid); + + b->values[value_id].value_type = value_type; + + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) +{ + assert(value_id < b->value_id_bound); + return &b->values[value_id]; +} + +static inline struct vtn_value * +vtn_value(struct vtn_builder *b, uint32_t value_id, + enum vtn_value_type value_type) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + assert(val->value_type == value_type); + return val; +} + +struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); + +typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *, + struct vtn_value *, + int member, + const struct vtn_decoration *, + void *); + +void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data); + +bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode, + const uint32_t *words, unsigned count); diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 3c3941645f0..3fef7c48d6f 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -23,6 +23,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "main/config.h" + #ifndef SHADER_ENUMS_H #define SHADER_ENUMS_H @@ -197,4 +199,214 @@ enum glsl_interp_qualifier }; +/** + * Indexes for vertex program attributes. + * GL_NV_vertex_program aliases generic attributes over the conventional + * attributes. In GL_ARB_vertex_program shader the aliasing is optional. + * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the + * generic attributes are distinct/separate). + */ +typedef enum +{ + VERT_ATTRIB_POS = 0, + VERT_ATTRIB_WEIGHT = 1, + VERT_ATTRIB_NORMAL = 2, + VERT_ATTRIB_COLOR0 = 3, + VERT_ATTRIB_COLOR1 = 4, + VERT_ATTRIB_FOG = 5, + VERT_ATTRIB_COLOR_INDEX = 6, + VERT_ATTRIB_EDGEFLAG = 7, + VERT_ATTRIB_TEX0 = 8, + VERT_ATTRIB_TEX1 = 9, + VERT_ATTRIB_TEX2 = 10, + VERT_ATTRIB_TEX3 = 11, + VERT_ATTRIB_TEX4 = 12, + VERT_ATTRIB_TEX5 = 13, + VERT_ATTRIB_TEX6 = 14, + VERT_ATTRIB_TEX7 = 15, + VERT_ATTRIB_POINT_SIZE = 16, + VERT_ATTRIB_GENERIC0 = 17, + VERT_ATTRIB_GENERIC1 = 18, + VERT_ATTRIB_GENERIC2 = 19, + VERT_ATTRIB_GENERIC3 = 20, + VERT_ATTRIB_GENERIC4 = 21, + VERT_ATTRIB_GENERIC5 = 22, + VERT_ATTRIB_GENERIC6 = 23, + VERT_ATTRIB_GENERIC7 = 24, + VERT_ATTRIB_GENERIC8 = 25, + VERT_ATTRIB_GENERIC9 = 26, + VERT_ATTRIB_GENERIC10 = 27, + VERT_ATTRIB_GENERIC11 = 28, + VERT_ATTRIB_GENERIC12 = 29, + VERT_ATTRIB_GENERIC13 = 30, + VERT_ATTRIB_GENERIC14 = 31, + VERT_ATTRIB_GENERIC15 = 32, + VERT_ATTRIB_MAX = 33 +} gl_vert_attrib; + +/** + * Symbolic constats to help iterating over + * specific blocks of vertex attributes. + * + * VERT_ATTRIB_FF + * includes all fixed function attributes as well as + * the aliased GL_NV_vertex_program shader attributes. + * VERT_ATTRIB_TEX + * include the classic texture coordinate attributes. + * Is a subset of VERT_ATTRIB_FF. + * VERT_ATTRIB_GENERIC + * include the OpenGL 2.0+ GLSL generic shader attributes. + * These alias the generic GL_ARB_vertex_shader attributes. + */ +#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) +#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 + +#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) +#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS + +#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) +#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS + +/** + * Bitflags for vertex attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) +#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) +#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) +#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) +#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) +#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) +#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) +#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) +#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) +#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) +#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) +#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) +#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) +#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) +#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) +#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) +#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) +#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) + +#define VERT_BIT(i) BITFIELD64_BIT(i) +#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) + +#define VERT_BIT_FF(i) VERT_BIT(i) +#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) +#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) +#define VERT_BIT_TEX_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) + +#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) +#define VERT_BIT_GENERIC_ALL \ + BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) +/*@}*/ + + +/** + * Indexes for vertex shader outputs, geometry shader inputs/outputs, and + * fragment shader inputs. + * + * Note that some of these values are not available to all pipeline stages. + * + * When this enum is updated, the following code must be updated too: + * - vertResults (in prog_print.c's arb_output_attrib_string()) + * - fragAttribs (in prog_print.c's arb_input_attrib_string()) + * - _mesa_varying_slot_in_fs() + */ +typedef enum +{ + VARYING_SLOT_POS, + VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */ + VARYING_SLOT_COL1, + VARYING_SLOT_FOGC, + VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */ + VARYING_SLOT_TEX1, + VARYING_SLOT_TEX2, + VARYING_SLOT_TEX3, + VARYING_SLOT_TEX4, + VARYING_SLOT_TEX5, + VARYING_SLOT_TEX6, + VARYING_SLOT_TEX7, + VARYING_SLOT_PSIZ, /* Does not appear in FS */ + VARYING_SLOT_BFC0, /* Does not appear in FS */ + VARYING_SLOT_BFC1, /* Does not appear in FS */ + VARYING_SLOT_EDGE, /* Does not appear in FS */ + VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */ + VARYING_SLOT_CLIP_DIST0, + VARYING_SLOT_CLIP_DIST1, + VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */ + VARYING_SLOT_LAYER, /* Appears as VS or GS output */ + VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */ + VARYING_SLOT_FACE, /* FS only */ + VARYING_SLOT_PNTC, /* FS only */ + VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */ + VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */ + VARYING_SLOT_VAR0, /* First generic varying slot */ + VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING, + VARYING_SLOT_PATCH0 = VARYING_SLOT_MAX, + VARYING_SLOT_TESS_MAX = VARYING_SLOT_PATCH0 + MAX_VARYING +} gl_varying_slot; + + +/** + * Bitflags for varying slots. + */ +/*@{*/ +#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS) +#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0) +#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1) +#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC) +#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0) +#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1) +#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2) +#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3) +#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4) +#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5) +#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6) +#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7) +#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U)) +#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \ + MAX_TEXTURE_COORD_UNITS) +#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ) +#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0) +#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1) +#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE) +#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) +#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) +#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1) +#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID) +#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER) +#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) +#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE) +#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC) +#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V)) +/*@}*/ + + +/** + * Fragment program results + */ +typedef enum +{ + FRAG_RESULT_DEPTH = 0, + FRAG_RESULT_STENCIL = 1, + /* If a single color should be written to all render targets, this + * register is written. No FRAG_RESULT_DATAn will be written. + */ + FRAG_RESULT_COLOR = 2, + FRAG_RESULT_SAMPLE_MASK = 3, + + /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n] + * or ARB_fragment_program fragment.color[n]) color results. If + * any are written, FRAG_RESULT_COLOR will not be written. + */ + FRAG_RESULT_DATA0 = 4, + FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +} gl_frag_result; + + #endif /* SHADER_ENUMS_H */ diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index 6033364afc5..6ff9553d6fe 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -35,6 +35,12 @@ #include "util/ralloc.h" #include "util/strtod.h" +extern "C" void +_mesa_error_no_memory(const char *caller) +{ + fprintf(stderr, "Mesa error: out of memory in %s", caller); +} + void _mesa_warning(struct gl_context *ctx, const char *fmt, ...) { diff --git a/src/mesa/drivers/dri/common/dri_test.c b/src/mesa/drivers/dri/common/dri_test.c index 57bfa5b9394..310e7617e2f 100644 --- a/src/mesa/drivers/dri/common/dri_test.c +++ b/src/mesa/drivers/dri/common/dri_test.c @@ -1,3 +1,4 @@ +#include <stdlib.h> #include "main/glheader.h" #include "main/compiler.h" #include "glapi/glapi.h" @@ -33,12 +34,14 @@ _glapi_check_multithread(void) PUBLIC void _glapi_set_context(void *context) -{} +{ + _glapi_Context = context; +} PUBLIC void * _glapi_get_context(void) { - return 0; + return _glapi_Context; } PUBLIC void @@ -84,7 +87,7 @@ _glapi_set_nop_handler(_glapi_nop_handler_proc func) PUBLIC struct _glapi_table * _glapi_new_nop_table(unsigned num_entries) { - return NULL; + return malloc(16); } #ifndef NO_MAIN diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 566f2ddd98f..3c8197226fe 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -41,7 +41,7 @@ AM_CFLAGS = \ AM_CXXFLAGS = $(AM_CFLAGS) -noinst_LTLIBRARIES = libi965_dri.la +noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la libi965_dri_la_SOURCES = $(i965_FILES) libi965_dri_la_LIBADD = $(INTEL_LIBS) @@ -55,6 +55,16 @@ TEST_LIBS = \ $(CLOCK_LIB) \ ../common/libdri_test_stubs.la +libi965_compiler_la_SOURCES = $(i965_FILES) +libi965_compiler_la_LIBADD = $(INTEL_LIBS) \ + ../common/libdricommon.la \ + ../common/libxmlconfig.la \ + ../common/libmegadriver_stub.la \ + ../../../libmesa.la \ + $(DRI_LIB_DEPS) \ + $(CLOCK_LIB) \ + ../common/libdri_test_stubs.la -lm + TESTS = \ test_fs_cmod_propagation \ test_fs_saturate_propagation \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index f428f58c69a..328662da82e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -311,7 +311,7 @@ brw_init_driver_functions(struct brw_context *brw, functions->GetSamplePosition = gen6_get_sample_position; } -static void +void brw_initialize_context_constants(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -390,7 +390,8 @@ brw_initialize_context_constants(struct brw_context *brw) int max_samples; const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); const int clamp_max_samples = - driQueryOptioni(&brw->optionCache, "clamp_max_samples"); + brw->optionCache.info != NULL ? + driQueryOptioni(&brw->optionCache, "clamp_max_samples") : -1; if (clamp_max_samples < 0) { max_samples = msaa_modes[0]; @@ -814,6 +815,7 @@ brwCreateContext(gl_api api, intel_batchbuffer_init(brw); +#if 0 if (brw->gen >= 6) { /* Create a new hardware context. Using a hardware context means that * our GPU state will be saved/restored on context switch, allowing us @@ -838,6 +840,7 @@ brwCreateContext(gl_api api, } brw_init_state(brw); +#endif intelInitExtensions(ctx); @@ -905,8 +908,10 @@ brwCreateContext(gl_api api, _mesa_compute_version(ctx); +#if 0 _mesa_initialize_dispatch_tables(ctx); _mesa_initialize_vbo_vtxfmt(ctx); +#endif if (ctx->Extensions.AMD_performance_monitor) { brw_init_performance_monitors(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b52bca77460..1267a6f5a97 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -361,6 +361,12 @@ struct brw_stage_prog_data { /** @} */ } binding_table; + uint32_t *map_entries; + struct { + uint32_t index_count; + uint32_t *index; + } bind_map[8]; /* MAX_SETS from vulkan/private.h */ + GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; unsigned nr_image_params; @@ -2069,6 +2075,15 @@ gen6_upload_push_constants(struct brw_context *brw, struct brw_stage_state *stage_state, enum aub_state_struct_type type); +struct intel_screen *intel_screen_create(int fd); +void intel_screen_destroy(struct intel_screen *screen); + +struct brw_context *intel_context_create(struct intel_screen *screen); +void intel_context_destroy(struct brw_context *brw); + +void +brw_initialize_context_constants(struct brw_context *brw); + bool gen9_use_linear_1d_layout(const struct brw_context *brw, const struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index cd7e0942277..6ce5779137e 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -55,7 +55,7 @@ brw_cs_prog_data_compare(const void *in_a, const void *in_b) } -static const unsigned * +const unsigned * brw_cs_emit(struct brw_context *brw, void *mem_ctx, const struct brw_cs_prog_key *key, diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 8404aa3e824..b83d49a0635 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -41,6 +41,15 @@ bool brw_cs_prog_data_compare(const void *a, const void *b); void brw_upload_cs_prog(struct brw_context *brw); +const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_compute_program *cp, + struct gl_shader_program *prog, + unsigned *final_assembly_size); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 82a36357de9..3bbaf977bc5 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -57,6 +57,7 @@ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) +#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */ #define _3DPRIM_POINTLIST 0x01 #define _3DPRIM_LINELIST 0x02 #define _3DPRIM_LINESTRIP 0x03 @@ -78,6 +79,7 @@ #define _3DPRIM_LINESTRIP_BF 0x13 #define _3DPRIM_LINESTRIP_CONT_BF 0x14 #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 +#endif /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 7ad3a2fb7b4..16c125d07ee 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -373,3 +373,15 @@ brw_get_device_info(int devid, int revision) return devinfo; } + +const char * +brw_get_device_name(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) case id: return name; +#include "pci_ids/i965_pci_ids.h" + default: + return NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 2a73e937d9f..7bab5716b43 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -87,3 +87,4 @@ struct brw_device_info }; const struct brw_device_info *brw_get_device_info(int devid, int revision); +const char *brw_get_device_name(int devid); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 82cb499a326..0e091ddc227 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -489,6 +489,7 @@ fs_visitor::type_size(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -1525,6 +1526,10 @@ fs_visitor::assign_vs_urb_setup() unsigned vue_entries = MAX2(count, vs_prog_data->base.vue_map.num_slots); + /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS + * command). Each attribute is 16 bytes (4 floats/dwords), so each unit + * fits four attributes. + */ vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; vs_prog_data->base.urb_read_length = (count + 1) / 2; @@ -2660,9 +2665,22 @@ fs_visitor::emit_repclear_shader() brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; int base_mrf = 1; int color_mrf = base_mrf + 2; + fs_inst *mov; + + if (uniforms == 1) { + mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + } else { + struct brw_reg reg = + brw_reg(BRW_GENERAL_REGISTER_FILE, + 2, 3, 0, 0, BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), + fs_reg(reg)); + } fs_inst *write; if (key->nr_color_regions == 1) { @@ -2691,8 +2709,10 @@ fs_visitor::emit_repclear_shader() assign_curb_setup(); /* Now that we have the uniform assigned, go ahead and force it to a vec4. */ - assert(mov->src[0].file == HW_REG); - mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0); + if (uniforms == 1) { + assert(mov->src[0].file == HW_REG); + mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0); + } } /** @@ -4971,7 +4991,8 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) { assert(stage == MESA_SHADER_VERTEX); - assign_common_binding_table_offsets(0); + if (prog_data->map_entries == NULL) + assign_common_binding_table_offsets(0); setup_vs_payload(); if (shader_time_index >= 0) @@ -5010,9 +5031,8 @@ fs_visitor::run_fs(bool do_rep_send) assert(stage == MESA_SHADER_FRAGMENT); - sanity_param_count = prog->Parameters->NumParameters; - - assign_binding_table_offsets(); + if (prog_data->map_entries == NULL) + assign_binding_table_offsets(); if (devinfo->gen >= 6) setup_payload_gen6(); @@ -5082,13 +5102,6 @@ fs_visitor::run_fs(bool do_rep_send) else wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used); - /* If any state parameters were appended, then ParameterValues could have - * been realloced, in which case the driver uniform storage set up by - * _mesa_associate_uniform_storage() would point to freed memory. Make - * sure that didn't happen. - */ - assert(sanity_param_count == prog->Parameters->NumParameters); - return !failed; } @@ -5161,7 +5174,7 @@ brw_wm_fs_emit(struct brw_context *brw, if (prog) shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (unlikely(INTEL_DEBUG & DEBUG_WM)) + if (unlikely(INTEL_DEBUG & DEBUG_WM) && shader->base.ir) brw_dump_ir("fragment", prog, &shader->base, &fp->Base); int st_index8 = -1, st_index16 = -1; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ce4153df70e..93a36cc03bf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1493,13 +1493,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr has_indirect = true; /* fallthrough */ case nir_intrinsic_load_ubo: { + uint32_t set = instr->const_index[0]; nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; if (const_index) { - surf_index = fs_reg(stage_prog_data->binding_table.ubo_start + - const_index->u[0]); + uint32_t binding = const_index->u[0]; + + /* FIXME: We should probably assert here, but dota2 seems to hit + * it and we'd like to keep going. + */ + if (binding >= stage_prog_data->bind_map[set].index_count) + binding = 0; + + surf_index = fs_reg(stage_prog_data->bind_map[set].index[binding]); } else { + assert(0 && "need more info from the ir for this."); /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value * from any live channel. @@ -1524,7 +1533,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_D), fs_reg(2)); - unsigned vec4_offset = instr->const_index[0] / 4; + unsigned vec4_offset = instr->const_index[1] / 4; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, base_offset, vec4_offset + i); @@ -1532,7 +1541,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15); + fs_reg const_offset_reg((unsigned) instr->const_index[1] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); @@ -1715,7 +1724,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { - unsigned sampler = instr->sampler_index; + uint32_t set = instr->sampler_set; + uint32_t binding = instr->sampler_index; + + assert(binding < stage_prog_data->bind_map[set].index_count); + assert(stage_prog_data->bind_map[set].index[binding] < 1000); + + unsigned sampler = stage_prog_data->bind_map[set].index[binding]; fs_reg sampler_reg(sampler); /* FINISHME: We're failing to recompile our programs when the sampler is @@ -1875,6 +1890,12 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) bld.emit(BRW_OPCODE_CONTINUE); break; case nir_jump_return: + /* This has to be the last block in the shader. We don't handle + * early returns. + */ + assert(nir_cf_node_next(&instr->instr.block->cf_node) == NULL && + instr->instr.block->cf_node.parent->type == nir_cf_node_function); + break; default: unreachable("unknown jump"); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 5c0d9230162..4ad65215756 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -33,19 +33,23 @@ #include "brw_state.h" #include "brw_ff_gs.h" - bool -brw_codegen_gs_prog(struct brw_context *brw, +brw_compile_gs_prog(struct brw_context *brw, struct gl_shader_program *prog, struct brw_geometry_program *gp, - struct brw_gs_prog_key *key) + struct brw_gs_prog_key *key, + struct brw_gs_compile_output *output) { - struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_compile c; memset(&c, 0, sizeof(c)); c.key = *key; c.gp = gp; + /* We get the bind map as input in the output struct...*/ + c.prog_data.base.base.map_entries = output->prog_data.base.base.map_entries; + memcpy(c.prog_data.base.base.bind_map, output->prog_data.base.base.bind_map, + sizeof(c.prog_data.base.base.bind_map)); + c.prog_data.include_primitive_id = (gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -270,19 +274,39 @@ brw_codegen_gs_prog(struct brw_context *brw, return false; } - /* Scratch space is used for register spilling */ - if (c.prog_data.base.base.total_scratch) { + output->mem_ctx = mem_ctx; + output->program = program; + output->program_size = program_size; + memcpy(&output->prog_data, &c.prog_data, + sizeof(output->prog_data)); + + return true; +} + +bool +brw_codegen_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct brw_gs_compile_output output; + struct brw_stage_state *stage_state = &brw->gs.base; + + if (brw_compile_gs_prog(brw, prog, gp, key, &output)) + return false; + + if (output.prog_data.base.base.total_scratch) { brw_get_scratch_bo(brw, &stage_state->scratch_bo, - c.prog_data.base.base.total_scratch * + output.prog_data.base.base.total_scratch * brw->max_gs_threads); } brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG, - &c.key, sizeof(c.key), - program, program_size, - &c.prog_data, sizeof(c.prog_data), + key, sizeof(*key), + output.program, output.program_size, + &output.prog_data, sizeof(output.prog_data), &stage_state->prog_offset, &brw->gs.prog_data); - ralloc_free(mem_ctx); + ralloc_free(output.mem_ctx); return true; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index b327c40f140..573bbdb16f8 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -37,6 +37,22 @@ struct gl_context; struct gl_shader_program; struct gl_program; +struct brw_gs_compile_output { + void *mem_ctx; + const void *program; + uint32_t program_size; + struct brw_gs_prog_data prog_data; +}; + +struct brw_gs_prog_key; + +bool +brw_compile_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, + struct brw_gs_compile_output *output); + bool brw_gs_prog_data_compare(const void *a, const void *b); void diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index b5788fa2e33..79e31d86759 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -76,7 +76,6 @@ brw_create_nir(struct brw_context *brw, const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[stage].NirOptions; struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL; - bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); nir_shader *nir; /* First, lower the GLSL IR or Mesa IR to NIR */ @@ -88,6 +87,27 @@ brw_create_nir(struct brw_context *brw, } nir_validate_shader(nir); + brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar); + + static GLuint msg_id = 0; + _mesa_gl_debug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, + "%s NIR shader:\n", + _mesa_shader_stage_to_abbrev(stage)); + + return nir; +} + +void +brw_process_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + gl_shader_stage stage, bool is_scalar) +{ + bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); + nir_lower_global_vars_to_local(nir); nir_validate_shader(nir); @@ -134,8 +154,10 @@ brw_create_nir(struct brw_context *brw, if (shader_prog) { nir_lower_samplers(nir, shader_prog, stage); - nir_validate_shader(nir); + } else { + nir_lower_samplers_for_vk(nir); } + nir_validate_shader(nir); nir_lower_system_values(nir); nir_validate_shader(nir); @@ -145,7 +167,7 @@ brw_create_nir(struct brw_context *brw, nir_optimize(nir, is_scalar); - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ nir_opt_peephole_ffma(nir); nir_validate_shader(nir); @@ -189,7 +211,7 @@ brw_create_nir(struct brw_context *brw, * run it last because it stashes data in instr->pass_flags and we don't * want that to be squashed by other NIR passes. */ - if (brw->gen <= 5) + if (devinfo->gen <= 5) brw_nir_analyze_boolean_resolves(nir); nir_sweep(nir); @@ -199,8 +221,6 @@ brw_create_nir(struct brw_context *brw, _mesa_shader_stage_to_string(stage)); nir_print_shader(nir, stderr); } - - return nir; } enum brw_reg_type diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index ad712930536..5a1358890cc 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -85,6 +85,12 @@ enum brw_reg_type brw_type_for_nir_type(nir_alu_type type); enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type); +void +brw_process_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + gl_shader_stage stage, bool is_scalar); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 4f380184464..5a54cd39076 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -276,7 +276,7 @@ brw_get_scratch_bo(struct brw_context *brw, void brwInitFragProgFuncs( struct dd_function_table *functions ) { - assert(functions->ProgramStringNotify == _tnl_program_string); + /* assert(functions->ProgramStringNotify == _tnl_program_string); */ functions->NewProgram = brwNewProgram; functions->DeleteProgram = brwDeleteProgram; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 6b928060ef0..67b8dde7cc8 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -459,6 +459,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 9e00c837407..5effb4c8829 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -416,6 +416,9 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) DBG("%s\n", __func__); + if (cache->bo == NULL) + return; + if (brw->has_llc) drm_intel_bo_unmap(cache->bo); drm_intel_bo_unreference(cache->bo); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f18915a8e38..63f75da7e99 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1916,7 +1916,7 @@ brw_vs_emit(struct brw_context *brw, if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS); - if (unlikely(INTEL_DEBUG & DEBUG_VS)) + if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir) brw_dump_ir("vertex", prog, &shader->base, &vp->Base); if (!vp->Base.nir && diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 9062bcc444f..20b628e9192 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -643,6 +643,7 @@ vec4_visitor::type_size(const struct glsl_type *type) case GLSL_TYPE_DOUBLE: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: unreachable("not reached"); } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 6ee92848172..41266f57560 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -41,7 +41,7 @@ * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. */ -static unsigned +unsigned brw_compute_barycentric_interp_modes(struct brw_context *brw, bool shade_model_flat, bool persample_shading, diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 0a8a97b2f5e..73a741f89e4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -90,6 +90,12 @@ bool brw_wm_prog_data_compare(const void *a, const void *b); void brw_upload_wm_prog(struct brw_context *brw); +unsigned +brw_compute_barycentric_interp_modes(struct brw_context *brw, + bool shade_model_flat, + bool persample_shading, + const struct gl_fragment_program *fprog); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index a0777310e2a..58f41bfd55d 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -60,7 +60,7 @@ static const struct dri_debug_control debug_control[] = { { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, - { "aub", DEBUG_AUB }, + { "foob", DEBUG_AUB }, /* disable aub dumbing in the dri driver */ { "shader_time", DEBUG_SHADER_TIME }, { "no16", DEBUG_NO16 }, { "blorp", DEBUG_BLORP }, diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 0da528b22a1..3bc28a12026 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -275,14 +275,17 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130; ctx->Extensions.EXT_timer_query = true; - if (brw->gen == 5 || can_write_oacontrol(brw)) { - ctx->Extensions.AMD_performance_monitor = true; - ctx->Extensions.INTEL_performance_query = true; + if (brw->bufmgr) { + if (brw->gen == 5 || can_write_oacontrol(brw)) { + ctx->Extensions.AMD_performance_monitor = true; + ctx->Extensions.INTEL_performance_query = true; + } } } if (brw->gen >= 6) { ctx->Extensions.ARB_blend_func_extended = + brw->optionCache.info == NULL || !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended"); ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_draw_buffers_blend = true; @@ -305,7 +308,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.EXT_transform_feedback = true; ctx->Extensions.OES_depth_texture_cube_map = true; - ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp; + /* Test if the kernel has the ioctl. */ + if (brw->intelScreen->hw_has_timestamp) + ctx->Extensions.ARB_timer_query = true; /* Only enable this in core profile because other parts of Mesa behave * slightly differently when the extension is enabled. @@ -329,7 +334,8 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; - if (can_do_pipelined_register_writes(brw)) { + if (brw->bufmgr && + can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_draw_indirect = true; ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; @@ -358,7 +364,9 @@ intelInitExtensions(struct gl_context *ctx) if (ctx->API != API_OPENGL_CORE) ctx->Extensions.ARB_color_buffer_float = true; - if (ctx->Mesa_DXTn || driQueryOptionb(&brw->optionCache, "force_s3tc_enable")) + if (ctx->Mesa_DXTn || + (brw->optionCache.info != NULL && + driQueryOptionb(&brw->optionCache, "force_s3tc_enable"))) ctx->Extensions.EXT_texture_compression_s3tc = true; ctx->Extensions.ANGLE_texture_compression_dxt = true; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 147fa1ea49e..a164c6985dc 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1486,6 +1486,78 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return (const __DRIconfig**) intel_screen_make_configs(psp); } +struct intel_screen * +intel_screen_create(int fd) +{ + __DRIscreen *psp; + __DRIconfig **configs; + int i; + + psp = malloc(sizeof(*psp)); + if (psp == NULL) + return NULL; + + psp->image.loader = (void *) 1; /* Don't complain about this being NULL */ + psp->fd = fd; + psp->dri2.useInvalidate = (void *) 1; + + configs = (__DRIconfig **) intelInitScreen2(psp); + for (i = 0; configs[i]; i++) + free(configs[i]); + free(configs); + + return psp->driverPrivate; +} + +void +intel_screen_destroy(struct intel_screen *screen) +{ + __DRIscreen *psp; + + psp = screen->driScrnPriv; + intelDestroyScreen(screen->driScrnPriv); + free(psp); +} + + +struct brw_context * +intel_context_create(struct intel_screen *screen) +{ + __DRIcontext *driContextPriv; + struct brw_context *brw; + unsigned error; + + driContextPriv = malloc(sizeof(*driContextPriv)); + if (driContextPriv == NULL) + return NULL; + + driContextPriv->driScreenPriv = screen->driScrnPriv; + + brwCreateContext(API_OPENGL_CORE, + NULL, /* visual */ + driContextPriv, + 3, 0, + 0, /* flags */ + false, /* notify_reset */ + &error, + NULL); + + brw = driContextPriv->driverPrivate; + brw->ctx.FirstTimeCurrent = false; + + return driContextPriv->driverPrivate; +} + +void +intel_context_destroy(struct brw_context *brw) +{ + __DRIcontext *driContextPriv; + + driContextPriv = brw->driContext; + intelDestroyContext(driContextPriv); + free(driContextPriv); +} + struct intel_buffer { __DRIbuffer base; drm_intel_bo *bo; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 4e00fb6645d..83f3717754d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -97,195 +97,6 @@ struct vbo_context; /** - * Indexes for vertex program attributes. - * GL_NV_vertex_program aliases generic attributes over the conventional - * attributes. In GL_ARB_vertex_program shader the aliasing is optional. - * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the - * generic attributes are distinct/separate). - */ -typedef enum -{ - VERT_ATTRIB_POS = 0, - VERT_ATTRIB_WEIGHT = 1, - VERT_ATTRIB_NORMAL = 2, - VERT_ATTRIB_COLOR0 = 3, - VERT_ATTRIB_COLOR1 = 4, - VERT_ATTRIB_FOG = 5, - VERT_ATTRIB_COLOR_INDEX = 6, - VERT_ATTRIB_EDGEFLAG = 7, - VERT_ATTRIB_TEX0 = 8, - VERT_ATTRIB_TEX1 = 9, - VERT_ATTRIB_TEX2 = 10, - VERT_ATTRIB_TEX3 = 11, - VERT_ATTRIB_TEX4 = 12, - VERT_ATTRIB_TEX5 = 13, - VERT_ATTRIB_TEX6 = 14, - VERT_ATTRIB_TEX7 = 15, - VERT_ATTRIB_POINT_SIZE = 16, - VERT_ATTRIB_GENERIC0 = 17, - VERT_ATTRIB_GENERIC1 = 18, - VERT_ATTRIB_GENERIC2 = 19, - VERT_ATTRIB_GENERIC3 = 20, - VERT_ATTRIB_GENERIC4 = 21, - VERT_ATTRIB_GENERIC5 = 22, - VERT_ATTRIB_GENERIC6 = 23, - VERT_ATTRIB_GENERIC7 = 24, - VERT_ATTRIB_GENERIC8 = 25, - VERT_ATTRIB_GENERIC9 = 26, - VERT_ATTRIB_GENERIC10 = 27, - VERT_ATTRIB_GENERIC11 = 28, - VERT_ATTRIB_GENERIC12 = 29, - VERT_ATTRIB_GENERIC13 = 30, - VERT_ATTRIB_GENERIC14 = 31, - VERT_ATTRIB_GENERIC15 = 32, - VERT_ATTRIB_MAX = 33 -} gl_vert_attrib; - -/** - * Symbolic constats to help iterating over - * specific blocks of vertex attributes. - * - * VERT_ATTRIB_FF - * includes all fixed function attributes as well as - * the aliased GL_NV_vertex_program shader attributes. - * VERT_ATTRIB_TEX - * include the classic texture coordinate attributes. - * Is a subset of VERT_ATTRIB_FF. - * VERT_ATTRIB_GENERIC - * include the OpenGL 2.0+ GLSL generic shader attributes. - * These alias the generic GL_ARB_vertex_shader attributes. - */ -#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i)) -#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0 - -#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i)) -#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS - -#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i)) -#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS - -/** - * Bitflags for vertex attributes. - * These are used in bitfields in many places. - */ -/*@{*/ -#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS) -#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT) -#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL) -#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0) -#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1) -#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG) -#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX) -#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG) -#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0) -#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1) -#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2) -#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3) -#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4) -#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5) -#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6) -#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7) -#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE) -#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0) - -#define VERT_BIT(i) BITFIELD64_BIT(i) -#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX) - -#define VERT_BIT_FF(i) VERT_BIT(i) -#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX) -#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i)) -#define VERT_BIT_TEX_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX) - -#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i)) -#define VERT_BIT_GENERIC_ALL \ - BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX) -/*@}*/ - - -/** - * Indexes for vertex shader outputs, geometry shader inputs/outputs, and - * fragment shader inputs. - * - * Note that some of these values are not available to all pipeline stages. - * - * When this enum is updated, the following code must be updated too: - * - vertResults (in prog_print.c's arb_output_attrib_string()) - * - fragAttribs (in prog_print.c's arb_input_attrib_string()) - * - _mesa_varying_slot_in_fs() - */ -typedef enum -{ - VARYING_SLOT_POS, - VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */ - VARYING_SLOT_COL1, - VARYING_SLOT_FOGC, - VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */ - VARYING_SLOT_TEX1, - VARYING_SLOT_TEX2, - VARYING_SLOT_TEX3, - VARYING_SLOT_TEX4, - VARYING_SLOT_TEX5, - VARYING_SLOT_TEX6, - VARYING_SLOT_TEX7, - VARYING_SLOT_PSIZ, /* Does not appear in FS */ - VARYING_SLOT_BFC0, /* Does not appear in FS */ - VARYING_SLOT_BFC1, /* Does not appear in FS */ - VARYING_SLOT_EDGE, /* Does not appear in FS */ - VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */ - VARYING_SLOT_CLIP_DIST0, - VARYING_SLOT_CLIP_DIST1, - VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */ - VARYING_SLOT_LAYER, /* Appears as VS or GS output */ - VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */ - VARYING_SLOT_FACE, /* FS only */ - VARYING_SLOT_PNTC, /* FS only */ - VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */ - VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */ - VARYING_SLOT_VAR0, /* First generic varying slot */ - VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING, - VARYING_SLOT_PATCH0 = VARYING_SLOT_MAX, - VARYING_SLOT_TESS_MAX = VARYING_SLOT_PATCH0 + MAX_VARYING -} gl_varying_slot; - - -/** - * Bitflags for varying slots. - */ -/*@{*/ -#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS) -#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0) -#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1) -#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC) -#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0) -#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1) -#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2) -#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3) -#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4) -#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5) -#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6) -#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7) -#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U)) -#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \ - MAX_TEXTURE_COORD_UNITS) -#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ) -#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0) -#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1) -#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE) -#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) -#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) -#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1) -#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID) -#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER) -#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT) -#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE) -#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC) -#define VARYING_BIT_TESS_LEVEL_OUTER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER) -#define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER) -#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V)) -/*@}*/ - -/** * Determine if the given gl_varying_slot appears in the fragment shader. */ static inline GLboolean @@ -308,28 +119,6 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot) /** - * Fragment program results - */ -typedef enum -{ - FRAG_RESULT_DEPTH = 0, - FRAG_RESULT_STENCIL = 1, - /* If a single color should be written to all render targets, this - * register is written. No FRAG_RESULT_DATAn will be written. - */ - FRAG_RESULT_COLOR = 2, - FRAG_RESULT_SAMPLE_MASK = 3, - - /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n] - * or ARB_fragment_program fragment.color[n]) color results. If - * any are written, FRAG_RESULT_COLOR will not be written. - */ - FRAG_RESULT_DATA0 = 4, - FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) -} gl_frag_result; - - -/** * Indexes for all renderbuffers */ typedef enum @@ -2683,6 +2472,11 @@ struct gl_uniform_block GLuint Binding; /** + * Vulkan descriptor set qualifier for this block. + */ + GLuint Set; + + /** * Minimum size (in bytes) of a buffer object to back this uniform buffer * (GL_UNIFORM_BLOCK_DATA_SIZE). */ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8f58f3edf98..b8b082e2a59 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -543,6 +543,7 @@ type_size(const struct glsl_type *type) case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -2463,6 +2464,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, case GLSL_TYPE_STRUCT: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_FUNCTION: assert(!"Should not get here."); break; } diff --git a/src/util/list.h b/src/util/list.h index b98ce59ff77..d4b485174fc 100644 --- a/src/util/list.h +++ b/src/util/list.h @@ -108,6 +108,28 @@ static inline unsigned list_length(struct list_head *list) return length; } +static inline void list_splice(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->next->prev = dst; + src->prev->next = dst->next; + dst->next->prev = src->prev; + dst->next = src->next; +} + +static inline void list_splicetail(struct list_head *src, struct list_head *dst) +{ + if (list_empty(src)) + return; + + src->prev->next = dst; + src->next->prev = dst->prev; + dst->prev->next = src->next; + dst->prev = src->prev; +} + static inline void list_validate(struct list_head *list) { struct list_head *node; diff --git a/src/vulkan/.gitignore b/src/vulkan/.gitignore new file mode 100644 index 00000000000..30c614497e5 --- /dev/null +++ b/src/vulkan/.gitignore @@ -0,0 +1,4 @@ +# Generated source files +/*_spirv.h +/anv_entrypoints.c +/anv_entrypoints.h diff --git a/src/vulkan/Makefile.am b/src/vulkan/Makefile.am new file mode 100644 index 00000000000..9b15871eb43 --- /dev/null +++ b/src/vulkan/Makefile.am @@ -0,0 +1,111 @@ +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +SUBDIRS = . tests + +vulkan_includedir = $(includedir)/vulkan + +vulkan_include_HEADERS = \ + $(top_srcdir)/include/vulkan/vk_platform.h \ + $(top_srcdir)/include/vulkan/vulkan.h \ + $(top_srcdir)/include/vulkan/vulkan_intel.h \ + $(top_srcdir)/include/vulkan/vk_wsi_lunarg.h + +lib_LTLIBRARIES = libvulkan.la + +check_LTLIBRARIES = libvulkan-test.la + +# The gallium includes are for the util/u_math.h include from main/macros.h + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_builddir)/src/vulkan + +libvulkan_la_CFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g \ + -Wstrict-prototypes -Wmissing-prototypes -Wno-override-init + +libvulkan_la_CXXFLAGS = \ + -Wall -Wextra -Wno-unused-parameter -fvisibility=hidden -O0 -g + +VULKAN_SOURCES = \ + anv_allocator.c \ + anv_aub.c \ + anv_cmd_buffer.c \ + anv_batch_chain.c \ + anv_compiler.cpp \ + anv_device.c \ + anv_entrypoints.c \ + anv_entrypoints.h \ + anv_formats.c \ + anv_image.c \ + anv_intel.c \ + anv_meta.c \ + anv_pipeline.c \ + anv_private.h \ + anv_query.c \ + anv_util.c \ + anv_x11.c + +libvulkan_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem.c + +BUILT_SOURCES = \ + anv_entrypoints.h \ + anv_entrypoints.c \ + anv_meta_spirv.h + +anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< header > $@ + +anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS) + $(AM_V_GEN)cat $(vulkan_include_HEADERS) | $(PYTHON2) $< code > $@ + +%_spirv.h: %.c glsl_scraper.py + $(AM_V_GEN) $(PYTHON2) $(srcdir)/glsl_scraper.py --glsl-only -o $@ $< + +CLEANFILES = $(BUILT_SOURCES) + +libvulkan_la_LIBADD = -lxcb -lxcb-dri3 \ + $(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la + +# Libvulkan with dummy gem. Used for unit tests. + +libvulkan_test_la_SOURCES = \ + $(VULKAN_SOURCES) \ + anv_gem_stubs.c + +libvulkan_test_la_CFLAGS = $(libvulkan_la_CFLAGS) +libvulkan_test_la_CXXFLAGS = $(libvulkan_la_CXXFLAGS) +libvulkan_test_la_LIBADD = $(libvulkan_la_LIBADD) + +include $(top_srcdir)/install-lib-links.mk diff --git a/src/vulkan/anv_allocator.c b/src/vulkan/anv_allocator.c new file mode 100644 index 00000000000..121ce039250 --- /dev/null +++ b/src/vulkan/anv_allocator.c @@ -0,0 +1,660 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <values.h> +#include <assert.h> +#include <linux/futex.h> +#include <linux/memfd.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "anv_private.h" + +#ifdef HAVE_VALGRIND +#define VG_NOACCESS_READ(__ptr) ({ \ + VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ + __typeof(*(__ptr)) __val = *(__ptr); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ + __val; \ +}) +#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ + VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ + *(__ptr) = (__val); \ + VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ +}) +#else +#define VG_NOACCESS_READ(__ptr) (*(__ptr)) +#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) +#endif + +/* Design goals: + * + * - Lock free (except when resizing underlying bos) + * + * - Constant time allocation with typically only one atomic + * + * - Multiple allocation sizes without fragmentation + * + * - Can grow while keeping addresses and offset of contents stable + * + * - All allocations within one bo so we can point one of the + * STATE_BASE_ADDRESS pointers at it. + * + * The overall design is a two-level allocator: top level is a fixed size, big + * block (8k) allocator, which operates out of a bo. Allocation is done by + * either pulling a block from the free list or growing the used range of the + * bo. Growing the range may run out of space in the bo which we then need to + * grow. Growing the bo is tricky in a multi-threaded, lockless environment: + * we need to keep all pointers and contents in the old map valid. GEM bos in + * general can't grow, but we use a trick: we create a memfd and use ftruncate + * to grow it as necessary. We mmap the new size and then create a gem bo for + * it using the new gem userptr ioctl. Without heavy-handed locking around + * our allocation fast-path, there isn't really a way to munmap the old mmap, + * so we just keep it around until garbage collection time. While the block + * allocator is lockless for normal operations, we block other threads trying + * to allocate while we're growing the map. It sholdn't happen often, and + * growing is fast anyway. + * + * At the next level we can use various sub-allocators. The state pool is a + * pool of smaller, fixed size objects, which operates much like the block + * pool. It uses a free list for freeing objects, but when it runs out of + * space it just allocates a new block from the block pool. This allocator is + * intended for longer lived state objects such as SURFACE_STATE and most + * other persistent state objects in the API. We may need to track more info + * with these object and a pointer back to the CPU object (eg VkImage). In + * those cases we just allocate a slightly bigger object and put the extra + * state after the GPU state object. + * + * The state stream allocator works similar to how the i965 DRI driver streams + * all its state. Even with Vulkan, we need to emit transient state (whether + * surface state base or dynamic state base), and for that we can just get a + * block and fill it up. These cases are local to a command buffer and the + * sub-allocator need not be thread safe. The streaming allocator gets a new + * block when it runs out of space and chains them together so they can be + * easily freed. + */ + +/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. + * We use it to indicate the free list is empty. */ +#define EMPTY 1 + +struct anv_mmap_cleanup { + void *map; + size_t size; + uint32_t gem_handle; +}; + +#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0}) + +static inline long +sys_futex(void *addr1, int op, int val1, + struct timespec *timeout, void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static inline int +futex_wake(uint32_t *addr, int count) +{ + return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0); +} + +static inline int +futex_wait(uint32_t *addr, int32_t value) +{ + return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); +} + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +static inline uint32_t +ilog2_round_up(uint32_t value) +{ + assert(value != 0); + return 32 - __builtin_clz(value - 1); +} + +static inline uint32_t +round_to_power_of_two(uint32_t value) +{ + return 1 << ilog2_round_up(value); +} + +static bool +anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + /* We have to add a memory barrier here so that the list head (and + * offset) gets read before we read the map pointer. This way we + * know that the map pointer is valid for the given offset at the + * point where we read it. + */ + __sync_synchronize(); + + uint32_t *next_ptr = *map + current.offset; + new.offset = VG_NOACCESS_READ(next_ptr); + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + *offset = current.offset; + return true; + } + current = old; + } + + return false; +} + +static void +anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset) +{ + union anv_free_list current, old, new; + uint32_t *next_ptr = map + offset; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, current.offset); + new.offset = offset; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + } while (old.u64 != current.u64); +} + +/* All pointers in the ptr_free_list are assumed to be page-aligned. This + * means that the bottom 12 bits should all be zero. + */ +#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) +#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff)) +#define PFL_PACK(ptr, count) ({ \ + assert(((uintptr_t)(ptr) & 0xfff) == 0); \ + (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \ +}) + +static bool +anv_ptr_free_list_pop(void **list, void **elem) +{ + void *current = *list; + while (PFL_PTR(current) != NULL) { + void **next_ptr = PFL_PTR(current); + void *new_ptr = VG_NOACCESS_READ(next_ptr); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(new_ptr, new_count); + void *old = __sync_val_compare_and_swap(list, current, new); + if (old == current) { + *elem = PFL_PTR(current); + return true; + } + current = old; + } + + return false; +} + +static void +anv_ptr_free_list_push(void **list, void *elem) +{ + void *old, *current; + void **next_ptr = elem; + + old = *list; + do { + current = old; + VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); + unsigned new_count = PFL_COUNT(current) + 1; + void *new = PFL_PACK(elem, new_count); + old = __sync_val_compare_and_swap(list, current, new); + } while (old != current); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size); + +void +anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size) +{ + assert(util_is_power_of_two(block_size)); + + pool->device = device; + pool->bo.gem_handle = 0; + pool->bo.offset = 0; + pool->block_size = block_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + anv_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + + /* Immediately grow the pool so we'll have a backing bo. */ + pool->state.next = 0; + pool->state.end = anv_block_pool_grow(pool, 0); +} + +void +anv_block_pool_finish(struct anv_block_pool *pool) +{ + struct anv_mmap_cleanup *cleanup; + + anv_vector_foreach(cleanup, &pool->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + if (cleanup->gem_handle) + anv_gem_close(pool->device, cleanup->gem_handle); + } + + anv_vector_finish(&pool->mmap_cleanups); + + close(pool->fd); +} + +static uint32_t +anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size) +{ + size_t size; + void *map; + int gem_handle; + struct anv_mmap_cleanup *cleanup; + + if (old_size == 0) { + size = 32 * pool->block_size; + } else { + size = old_size * 2; + } + + cleanup = anv_vector_add(&pool->mmap_cleanups); + if (!cleanup) + return 0; + *cleanup = ANV_MMAP_CLEANUP_INIT; + + if (old_size == 0) + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + + if (pool->fd == -1) + return 0; + + if (ftruncate(pool->fd, size) == -1) + return 0; + + /* First try to see if mremap can grow the map in place. */ + map = MAP_FAILED; + if (old_size > 0) + map = mremap(pool->map, old_size, size, 0); + if (map == MAP_FAILED) { + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, 0); + cleanup->map = map; + cleanup->size = size; + } + if (map == MAP_FAILED) + return 0; + + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) + return 0; + cleanup->gem_handle = gem_handle; + + /* Now that we successfull allocated everything, we can write the new + * values back into pool. */ + pool->map = map; + pool->bo.gem_handle = gem_handle; + pool->bo.size = size; + pool->bo.map = map; + pool->bo.index = 0; + + return size; +} + +uint32_t +anv_block_pool_alloc(struct anv_block_pool *pool) +{ + uint32_t offset; + struct anv_block_state state, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) { + assert(pool->map); + return offset; + } + + restart: + state.u64 = __sync_fetch_and_add(&pool->state.u64, pool->block_size); + if (state.next < state.end) { + assert(pool->map); + return state.next; + } else if (state.next == state.end) { + /* We allocated the first block outside the pool, we have to grow it. + * pool->next_block acts a mutex: threads who try to allocate now will + * get block indexes above the current limit and hit futex_wait + * below. */ + new.next = state.next + pool->block_size; + new.end = anv_block_pool_grow(pool, state.end); + assert(new.end > 0); + old.u64 = __sync_lock_test_and_set(&pool->state.u64, new.u64); + if (old.next != state.next) + futex_wake(&pool->state.end, INT_MAX); + return state.next; + } else { + futex_wait(&pool->state.end, state.end); + goto restart; + } +} + +void +anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset) +{ + anv_free_list_push(&pool->free_list, pool->map, offset); +} + +static void +anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool, + size_t state_size) +{ + /* At least a cache line and must divide the block size. */ + assert(state_size >= 64 && util_is_power_of_two(state_size)); + + pool->state_size = state_size; + pool->free_list = ANV_FREE_LIST_EMPTY; + pool->block.next = 0; + pool->block.end = 0; +} + +static uint32_t +anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool) +{ + uint32_t offset; + struct anv_block_state block, old, new; + + /* Try free list first. */ + if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) + return offset; + + /* If free list was empty (or somebody raced us and took the items) we + * allocate a new item from the end of the block */ + restart: + block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size); + + if (block.next < block.end) { + return block.next; + } else if (block.next == block.end) { + offset = anv_block_pool_alloc(block_pool); + new.next = offset + pool->state_size; + new.end = offset + block_pool->block_size; + old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); + if (old.next != block.next) + futex_wake(&pool->block.end, INT_MAX); + return offset; + } else { + futex_wait(&pool->block.end, block.end); + goto restart; + } +} + +static void +anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool, + struct anv_block_pool *block_pool, + uint32_t offset) +{ + anv_free_list_push(&pool->free_list, block_pool->map, offset); +} + +void +anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool) +{ + pool->block_pool = block_pool; + for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { + size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i); + anv_fixed_size_state_pool_init(&pool->buckets[i], size); + } + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_state_pool_finish(struct anv_state_pool *pool) +{ + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +struct anv_state +anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align) +{ + unsigned size_log2 = ilog2_round_up(size < align ? align : size); + assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) + size_log2 = ANV_MIN_STATE_SIZE_LOG2; + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + struct anv_state state; + state.alloc_size = 1 << size_log2; + state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket], + pool->block_pool); + state.map = pool->block_pool->map + state.offset; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); + return state; +} + +void +anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) +{ + assert(util_is_power_of_two(state.alloc_size)); + unsigned size_log2 = ilog2_round_up(state.alloc_size); + assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 && + size_log2 <= ANV_MAX_STATE_SIZE_LOG2); + unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2; + + VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); + anv_fixed_size_state_pool_free(&pool->buckets[bucket], + pool->block_pool, state.offset); +} + +#define NULL_BLOCK 1 +struct stream_block { + uint32_t next; + + /* The map for the BO at the time the block was givne to us */ + void *current_map; + +#ifdef HAVE_VALGRIND + void *_vg_ptr; +#endif +}; + +/* The state stream allocator is a one-shot, single threaded allocator for + * variable sized blocks. We use it for allocating dynamic state. + */ +void +anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool) +{ + stream->block_pool = block_pool; + stream->next = 0; + stream->end = 0; + stream->current_block = NULL_BLOCK; + + VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); +} + +void +anv_state_stream_finish(struct anv_state_stream *stream) +{ + struct stream_block *sb; + uint32_t block, next_block; + + block = stream->current_block; + while (block != NULL_BLOCK) { + sb = stream->block_pool->map + block; + next_block = VG_NOACCESS_READ(&sb->next); + VG(VALGRIND_MEMPOOL_FREE(stream, VG_NOACCESS_READ(&sb->_vg_ptr))); + anv_block_pool_free(stream->block_pool, block); + block = next_block; + } + + VG(VALGRIND_DESTROY_MEMPOOL(stream)); +} + +struct anv_state +anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment) +{ + struct stream_block *sb; + struct anv_state state; + uint32_t block; + + state.offset = align_u32(stream->next, alignment); + if (state.offset + size > stream->end) { + block = anv_block_pool_alloc(stream->block_pool); + void *current_map = stream->block_pool->map; + sb = current_map + block; + VG_NOACCESS_WRITE(&sb->current_map, current_map); + VG_NOACCESS_WRITE(&sb->next, stream->current_block); + VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0)); + stream->current_block = block; + stream->next = block + sizeof(*sb); + stream->end = block + stream->block_pool->block_size; + state.offset = align_u32(stream->next, alignment); + assert(state.offset + size <= stream->end); + } + + sb = stream->block_pool->map + stream->current_block; + void *current_map = VG_NOACCESS_READ(&sb->current_map); + + state.map = current_map + state.offset; + state.alloc_size = size; + +#ifdef HAVE_VALGRIND + void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr); + if (vg_ptr == NULL) { + vg_ptr = state.map; + VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr); + VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size); + } else { + ptrdiff_t vg_offset = vg_ptr - current_map; + assert(vg_offset >= stream->current_block && + vg_offset < stream->end); + VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, + (state.offset + size) - vg_offset); + } +#endif + + stream->next = state.offset + size; + + return state; +} + +struct bo_pool_bo_link { + struct bo_pool_bo_link *next; + struct anv_bo bo; +}; + +void +anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t bo_size) +{ + pool->device = device; + pool->bo_size = bo_size; + pool->free_list = NULL; + + VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); +} + +void +anv_bo_pool_finish(struct anv_bo_pool *pool) +{ + struct bo_pool_bo_link *link = PFL_PTR(pool->free_list); + while (link != NULL) { + struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); + + anv_gem_munmap(link_copy.bo.map, pool->bo_size); + anv_gem_close(pool->device, link_copy.bo.gem_handle); + link = link_copy.next; + } + + VG(VALGRIND_DESTROY_MEMPOOL(pool)); +} + +VkResult +anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo) +{ + VkResult result; + + void *next_free_void; + if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) { + struct bo_pool_bo_link *next_free = next_free_void; + *bo = VG_NOACCESS_READ(&next_free->bo); + assert(bo->map == next_free); + assert(bo->size == pool->bo_size); + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; + } + + struct anv_bo new_bo; + + result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size); + if (result != VK_SUCCESS) + return result; + + assert(new_bo.size == pool->bo_size); + + new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size); + if (new_bo.map == NULL) { + anv_gem_close(pool->device, new_bo.gem_handle); + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + } + + *bo = new_bo; + + VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size)); + + return VK_SUCCESS; +} + +void +anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo) +{ + struct bo_pool_bo_link *link = bo->map; + link->bo = *bo; + + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); + anv_ptr_free_list_push(&pool->free_list, link); +} diff --git a/src/vulkan/anv_aub.c b/src/vulkan/anv_aub.c new file mode 100644 index 00000000000..e4a35873590 --- /dev/null +++ b/src/vulkan/anv_aub.c @@ -0,0 +1,293 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <drm.h> +#include <i915_drm.h> + +#include "anv_private.h" +#include "anv_aub.h" + +struct anv_aub_writer { + FILE *file; + uint32_t offset; + int gen; +}; + +static void +aub_out(struct anv_aub_writer *writer, uint32_t data) +{ + fwrite(&data, 1, 4, writer->file); +} + +static void +aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size) +{ + fwrite(data, 1, size, writer->file); +} + +static struct anv_aub_writer * +get_anv_aub_writer(struct anv_device *device) +{ + struct anv_aub_writer *writer = device->aub_writer; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (geteuid() != getuid()) + return NULL; + + if (writer) + return writer; + + writer = malloc(sizeof(*writer)); + if (writer == NULL) + return NULL; + + filename = "intel.aub"; + writer->gen = device->info.gen; + writer->file = fopen(filename, "w+"); + if (!writer->file) { + free(writer); + return NULL; + } + + /* Start allocating objects from just after the GTT. */ + writer->offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(writer, CMD_AUB_HEADER | (13 - 2)); + aub_out(writer, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(writer, 0); /* app name */ + } + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* timestamp */ + aub_out(writer, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT_ENTRY | + AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, 0); /* subtype */ + aub_out(writer, 0); /* offset */ + aub_out(writer, gtt_size); /* size */ + if (writer->gen >= 8) + aub_out(writer, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(writer, entry); + } + + return device->aub_writer = writer; +} + +void +anv_aub_writer_destroy(struct anv_aub_writer *writer) +{ + fclose(writer->file); + free(writer); +} + + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type, + void *virtual, uint32_t size, uint32_t gtt_offset) +{ + uint32_t block_size; + uint32_t offset; + uint32_t subtype = 0; + static const char null_block[8 * 4096]; + + for (offset = 0; offset < size; offset += block_size) { + block_size = size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | + type | AUB_TRACE_OP_DATA_WRITE); + aub_out(writer, subtype); + aub_out(writer, gtt_offset + offset); + aub_out(writer, align_u32(block_size, 4)); + if (writer->gen >= 8) + aub_out(writer, 0); + + if (virtual) + aub_out_data(writer, (char *) virtual + offset, block_size); + else + aub_out_data(writer, null_block, block_size); + + /* Pad to a multiple of 4 bytes. */ + aub_out_data(writer, null_block, -block_size & 3); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(struct anv_aub_writer *writer, + uint32_t batch_offset, uint32_t offset, + int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (writer->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_offset; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_offset; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(writer, + CMD_AUB_TRACE_HEADER_BLOCK | + ((writer->gen >= 8 ? 6 : 5) - 2)); + aub_out(writer, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(writer, 0); /* general/surface subtype */ + aub_out(writer, offset); + aub_out(writer, ring_count * 4); + if (writer->gen >= 8) + aub_out(writer, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(writer, ringbuffer, ring_count * 4); +} + +struct aub_bo { + uint32_t size; + uint32_t offset; + void *map; + void *relocated; +}; + +static void +relocate_bo(struct aub_bo *aub_bo, + const struct drm_i915_gem_exec_object2 *gem_obj, + struct aub_bo *aub_bos) +{ + const struct drm_i915_gem_relocation_entry *relocs = + (const struct drm_i915_gem_relocation_entry *) gem_obj->relocs_ptr; + uint32_t *dw; + + aub_bo->relocated = malloc(aub_bo->size); + memcpy(aub_bo->relocated, aub_bo->map, aub_bo->size); + for (size_t i = 0; i < gem_obj->relocation_count; i++) { + assert(relocs[i].offset < aub_bo->size); + dw = aub_bo->relocated + relocs[i].offset; + *dw = aub_bos[relocs[i].target_handle].offset + relocs[i].delta; + } +} + +void +anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_aub_writer *writer; + struct anv_bo *bo; + uint32_t ring_flag = 0; + uint32_t offset; + struct aub_bo *aub_bos; + + writer = get_anv_aub_writer(device); + if (writer == NULL) + return; + + aub_bos = malloc(cmd_buffer->execbuf2.bo_count * sizeof(aub_bos[0])); + offset = writer->offset; + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { + bo = cmd_buffer->execbuf2.bos[i]; + if (bo->map) + aub_bos[i].map = bo->map; + else + aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size); + aub_bos[i].size = bo->size; + aub_bos[i].relocated = aub_bos[i].map; + aub_bos[i].offset = offset; + offset = align_u32(offset + bo->size + 4095, 4096); + } + + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) + relocate_bo(&aub_bos[i], &cmd_buffer->execbuf2.objects[i], aub_bos); + + struct aub_bo *batch_bo = &aub_bos[cmd_buffer->execbuf2.bo_count - 1]; + + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) { + bo = cmd_buffer->execbuf2.bos[i]; + if (&aub_bos[i] == batch_bo) { + aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } else { + aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE, + aub_bos[i].relocated, + bo->size, aub_bos[i].offset); + } + if (aub_bos[i].relocated != aub_bos[i].map) + free(aub_bos[i].relocated); + if (aub_bos[i].map != bo->map) + anv_gem_munmap(aub_bos[i].map, bo->size); + } + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(writer, batch_bo->offset, offset, ring_flag); + + free(aub_bos); + + fflush(writer->file); +} diff --git a/src/vulkan/anv_aub.h b/src/vulkan/anv_aub.h new file mode 100644 index 00000000000..7a67712ff9c --- /dev/null +++ b/src/vulkan/anv_aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), + AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), + AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), + AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), + AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), + AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), + AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), + AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), + AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), + AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), + + AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), + AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), + AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), + + AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), + AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), + AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), + AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), + AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), +}; + +#undef ENCODE_SS_TYPE + +/** + * Decode a aub_state_struct_type value to determine the type that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) +{ + return (ss_type & 0xFFFF0000) >> 16; +} + +/** + * Decode a state_struct_type value to determine the subtype that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) +{ + return ss_type & 0xFFFF; +} + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/src/vulkan/anv_batch_chain.c b/src/vulkan/anv_batch_chain.c new file mode 100644 index 00000000000..c34f58b2534 --- /dev/null +++ b/src/vulkan/anv_batch_chain.c @@ -0,0 +1,936 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +/** \file anv_batch_chain.c + * + * This file contains functions related to anv_cmd_buffer as a data + * structure. This involves everything required to create and destroy + * the actual batch buffers as well as link them together and handle + * relocations and surface state. It specifically does *not* contain any + * handling of actual vkCmd calls beyond vkCmdExecuteCommands. + */ + +/*-----------------------------------------------------------------------* + * Functions related to anv_reloc_list + *-----------------------------------------------------------------------*/ + +static VkResult +anv_reloc_list_init_clone(struct anv_reloc_list *list, + struct anv_device *device, + const struct anv_reloc_list *other_list) +{ + if (other_list) { + list->num_relocs = other_list->num_relocs; + list->array_length = other_list->array_length; + } else { + list->num_relocs = 0; + list->array_length = 256; + } + + list->relocs = + anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list->reloc_bos = + anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + + if (list->reloc_bos == NULL) { + anv_device_free(device, list->relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (other_list) { + memcpy(list->relocs, other_list->relocs, + list->array_length * sizeof(*list->relocs)); + memcpy(list->reloc_bos, other_list->reloc_bos, + list->array_length * sizeof(*list->reloc_bos)); + } + + return VK_SUCCESS; +} + +VkResult +anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device) +{ + return anv_reloc_list_init_clone(list, device, NULL); +} + +void +anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device) +{ + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); +} + +static VkResult +anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device, + size_t num_additional_relocs) +{ + if (list->num_relocs + num_additional_relocs <= list->array_length) + return VK_SUCCESS; + + size_t new_length = list->array_length * 2; + while (new_length < list->num_relocs + num_additional_relocs) + new_length *= 2; + + struct drm_i915_gem_relocation_entry *new_relocs = + anv_device_alloc(device, new_length * sizeof(*list->relocs), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_reloc_bos = + anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8, + VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_relocs == NULL) { + anv_device_free(device, new_relocs); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); + memcpy(new_reloc_bos, list->reloc_bos, + list->num_relocs * sizeof(*list->reloc_bos)); + + anv_device_free(device, list->relocs); + anv_device_free(device, list->reloc_bos); + + list->array_length = new_length; + list->relocs = new_relocs; + list->reloc_bos = new_reloc_bos; + + return VK_SUCCESS; +} + +uint64_t +anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, uint32_t delta) +{ + struct drm_i915_gem_relocation_entry *entry; + int index; + + anv_reloc_list_grow(list, device, 1); + /* TODO: Handle failure */ + + /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ + index = list->num_relocs++; + list->reloc_bos[index] = target_bo; + entry = &list->relocs[index]; + entry->target_handle = target_bo->gem_handle; + entry->delta = delta; + entry->offset = offset; + entry->presumed_offset = target_bo->offset; + entry->read_domains = 0; + entry->write_domain = 0; + + return target_bo->offset + delta; +} + +static void +anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device, + struct anv_reloc_list *other, uint32_t offset) +{ + anv_reloc_list_grow(list, device, other->num_relocs); + /* TODO: Handle failure */ + + memcpy(&list->relocs[list->num_relocs], &other->relocs[0], + other->num_relocs * sizeof(other->relocs[0])); + memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], + other->num_relocs * sizeof(other->reloc_bos[0])); + + for (uint32_t i = 0; i < other->num_relocs; i++) + list->relocs[i + list->num_relocs].offset += offset; + + list->num_relocs += other->num_relocs; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch + *-----------------------------------------------------------------------*/ + +void * +anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) +{ + if (batch->next + num_dwords * 4 > batch->end) + batch->extend_cb(batch, batch->user_data); + + void *p = batch->next; + + batch->next += num_dwords * 4; + assert(batch->next <= batch->end); + + return p; +} + +uint64_t +anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t delta) +{ + return anv_reloc_list_add(batch->relocs, batch->device, + location - batch->start, bo, delta); +} + +void +anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) +{ + uint32_t size, offset; + + size = other->next - other->start; + assert(size % 4 == 0); + + if (batch->next + size > batch->end) + batch->extend_cb(batch, batch->user_data); + + assert(batch->next + size <= batch->end); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); + memcpy(batch->next, other->start, size); + + offset = batch->next - batch->start; + anv_reloc_list_append(batch->relocs, batch->device, + other->relocs, offset); + + batch->next += size; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static VkResult +anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init(&bbo->relocs, device); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static VkResult +anv_batch_bo_clone(struct anv_device *device, + const struct anv_batch_bo *other_bbo, + struct anv_batch_bo **bbo_out) +{ + VkResult result; + + struct anv_batch_bo *bbo = + anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (bbo == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + + result = anv_reloc_list_init_clone(&bbo->relocs, device, &other_bbo->relocs); + if (result != VK_SUCCESS) + goto fail_bo_alloc; + + bbo->length = other_bbo->length; + memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); + + *bbo_out = bbo; + + return VK_SUCCESS; + + fail_bo_alloc: + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + fail_alloc: + anv_device_free(device, bbo); + + return result; +} + +static void +anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->next = batch->start = bbo->bo.map; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; + bbo->relocs.num_relocs = 0; +} + +static void +anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, + size_t batch_padding) +{ + batch->start = bbo->bo.map; + batch->next = bbo->bo.map + bbo->length; + batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->relocs = &bbo->relocs; +} + +static void +anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) +{ + assert(batch->start == bbo->bo.map); + bbo->length = batch->next - batch->start; + VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); +} + +static void +anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device) +{ + anv_reloc_list_finish(&bbo->relocs, device); + anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo); + anv_device_free(device, bbo); +} + +static VkResult +anv_batch_bo_list_clone(const struct list_head *list, struct anv_device *device, + struct list_head *new_list) +{ + VkResult result = VK_SUCCESS; + + list_inithead(new_list); + + struct anv_batch_bo *prev_bbo = NULL; + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo *new_bbo; + result = anv_batch_bo_clone(device, bbo, &new_bbo); + if (result != VK_SUCCESS) + break; + list_addtail(&new_bbo->link, new_list); + + if (prev_bbo) { + /* As we clone this list of batch_bo's, they chain one to the + * other using MI_BATCH_BUFFER_START commands. We need to fix up + * those relocations as we go. Fortunately, this is pretty easy + * as it will always be the last relocation in the list. + */ + uint32_t last_idx = prev_bbo->relocs.num_relocs - 1; + assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo); + prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo; + } + + prev_bbo = new_bbo; + } + + if (result != VK_SUCCESS) { + list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) + anv_batch_bo_destroy(bbo, device); + } + + return result; +} + +/*-----------------------------------------------------------------------* + * Functions related to anv_batch_bo + *-----------------------------------------------------------------------*/ + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); +} + +static inline struct anv_batch_bo * +anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer) +{ + return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link); +} + +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo; +} + +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer) +{ + return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs; +} + +static void +cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_batch_bo *bbo) +{ + struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch_bo *current_bbo = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + + /* We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. + */ + batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + + anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &bbo->bo, 0 }, + ); + + anv_batch_bo_finish(current_bbo, batch); +} + +static VkResult +anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) +{ + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + + list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); + + anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + struct anv_bo *surface_bo = + anv_cmd_buffer_current_surface_bo(cmd_buffer); + struct anv_state state; + + state.offset = align_u32(cmd_buffer->surface_next, alignment); + if (state.offset + size > surface_bo->size) + return (struct anv_state) { 0 }; + + state.map = surface_bo->map + state.offset; + state.alloc_size = size; + cmd_buffer->surface_next = state.offset + size; + + assert(state.offset + size <= surface_bo->size); + + return state; +} + +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment) +{ + return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, + size, alignment); +} + +VkResult +anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *new_bbo, *old_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + /* Finish off the old buffer */ + old_bbo->length = cmd_buffer->surface_next; + + VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo); + if (result != VK_SUCCESS) + return result; + + struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer->device); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + cmd_buffer->surface_next = 1; + + list_addtail(&new_bbo->link, &cmd_buffer->surface_bos); + + return VK_SUCCESS; +} + +VkResult +anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo, *surface_bbo; + struct anv_device *device = cmd_buffer->device; + VkResult result; + + list_inithead(&cmd_buffer->batch_bos); + list_inithead(&cmd_buffer->surface_bos); + + result = anv_batch_bo_create(device, &batch_bo); + if (result != VK_SUCCESS) + return result; + + list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); + + cmd_buffer->batch.device = device; + cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; + cmd_buffer->batch.user_data = cmd_buffer; + + anv_batch_bo_start(batch_bo, &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + result = anv_batch_bo_create(device, &surface_bbo); + if (result != VK_SUCCESS) + goto fail_batch_bo; + + list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos); + + int success = anv_vector_init(&cmd_buffer->seen_bbos, + sizeof(struct anv_bo *), + 8 * sizeof(struct anv_bo *)); + if (!success) + goto fail_surface_bo; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo; + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = surface_bbo; + + /* Start surface_next at 1 so surface offset 0 is invalid. */ + cmd_buffer->surface_next = 1; + + cmd_buffer->execbuf2.objects = NULL; + cmd_buffer->execbuf2.bos = NULL; + cmd_buffer->execbuf2.array_length = 0; + + return VK_SUCCESS; + + fail_surface_bo: + anv_batch_bo_destroy(surface_bbo, device); + fail_batch_bo: + anv_batch_bo_destroy(batch_bo, device); + + return result; +} + +void +anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + anv_vector_finish(&cmd_buffer->seen_bbos); + + /* Destroy all of the batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->batch_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + /* Destroy all of the surface state buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->surface_bos, link) { + anv_batch_bo_destroy(bbo, device); + } + + anv_device_free(device, cmd_buffer->execbuf2.objects); + anv_device_free(device, cmd_buffer->execbuf2.bos); +} + +void +anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), + &cmd_buffer->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + /* Delete all but the first batch bo */ + assert(!list_empty(&cmd_buffer->batch_bos)); + while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) { + struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer); + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, device); + } + assert(!list_empty(&cmd_buffer->batch_bos)); + + anv_cmd_buffer_current_surface_bbo(cmd_buffer)->relocs.num_relocs = 0; + + cmd_buffer->surface_next = 1; + + /* Reset the list of seen buffers */ + cmd_buffer->seen_bbos.head = 0; + cmd_buffer->seen_bbos.tail = 0; + + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_batch_bo(cmd_buffer); + *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); +} + +void +anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); + struct anv_batch_bo *surface_bbo = + anv_cmd_buffer_current_surface_bbo(cmd_buffer); + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END); + + /* Round batch up to an even number of dwords. */ + if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP); + + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; + } + + anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); + + surface_bbo->length = cmd_buffer->surface_next; + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + /* If this is a secondary command buffer, we need to determine the + * mode in which it will be executed with vkExecuteCommands. We + * determine this statically here so that this stays in sync with the + * actual ExecuteCommands implementation. + */ + if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && + (anv_cmd_buffer_current_batch_bo(cmd_buffer)->length < + ANV_CMD_BUFFER_BATCH_SIZE / 2)) { + /* If the secondary has exactly one batch buffer in its list *and* + * that batch buffer is less than half of the maximum size, we're + * probably better of simply copying it into our batch. + */ + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; + } else if (cmd_buffer->opt_flags & + VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT) { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; + + /* For chaining mode, we need to increment the number of + * relocations. This is because, when we chain, we need to add + * an MI_BATCH_BUFFER_START command. Adding this command will + * also add a relocation. In order to handle theis we'll + * increment it here and decrement it right before adding the + * MI_BATCH_BUFFER_START command. + */ + anv_cmd_buffer_current_batch_bo(cmd_buffer)->relocs.num_relocs++; + } else { + cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; + } + } +} + +static inline VkResult +anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, + struct list_head *list) +{ + list_for_each_entry(struct anv_batch_bo, bbo, list, link) { + struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos); + if (bbo_ptr == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *bbo_ptr = bbo; + } + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary) +{ + switch (secondary->exec_mode) { + case ANV_CMD_BUFFER_EXEC_MODE_EMIT: + anv_batch_emit_batch(&primary->batch, &secondary->batch); + break; + case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { + struct anv_batch_bo *first_bbo = + list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); + + anv_batch_emit(&primary->batch, GEN8_MI_BATCH_BUFFER_START, + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &first_bbo->bo, 0 }, + ); + + struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); + assert(primary->batch.start == this_bbo->bo.map); + uint32_t offset = primary->batch.next - primary->batch.start; + + struct GEN8_MI_BATCH_BUFFER_START ret = { + GEN8_MI_BATCH_BUFFER_START_header, + ._2ndLevelBatchBuffer = _1stlevelbatch, + .AddressSpaceIndicator = ASI_PPGTT, + .BatchBufferStartAddress = { &this_bbo->bo, offset }, + }; + /* The pack function below is going to insert a relocation. In order + * to allow us to splice this secondary into a primary multiple + * times, we can't have relocations from previous splices in this + * splice. In order to deal with this, we simply decrement the + * relocation count prior to inserting the next one. In order to + * handle the base case, num_relocs was artificially incremented in + * end_batch_buffer(). + */ + last_bbo->relocs.num_relocs--; + GEN8_MI_BATCH_BUFFER_START_pack(&secondary->batch, + last_bbo->bo.map + last_bbo->length, + &ret); + + anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); + break; + } + case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { + struct list_head copy_list; + VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, + secondary->device, + ©_list); + if (result != VK_SUCCESS) + return; /* FIXME */ + + anv_cmd_buffer_add_seen_bbos(primary, ©_list); + + struct anv_batch_bo *first_bbo = + list_first_entry(©_list, struct anv_batch_bo, link); + struct anv_batch_bo *last_bbo = + list_last_entry(©_list, struct anv_batch_bo, link); + + cmd_buffer_chain_to_batch_bo(primary, first_bbo); + + list_splicetail(©_list, &primary->batch_bos); + + anv_batch_bo_continue(last_bbo, &primary->batch, + GEN8_MI_BATCH_BUFFER_START_length * 4); + + anv_cmd_buffer_emit_state_base_address(primary); + break; + } + default: + assert(!"Invalid execution mode"); + } + + /* Mark the surface buffer from the secondary as seen */ + anv_cmd_buffer_add_seen_bbos(primary, &secondary->surface_bos); +} + +static VkResult +anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *bo, + struct anv_reloc_list *relocs) +{ + struct drm_i915_gem_exec_object2 *obj = NULL; + + if (bo->index < cmd_buffer->execbuf2.bo_count && + cmd_buffer->execbuf2.bos[bo->index] == bo) + obj = &cmd_buffer->execbuf2.objects[bo->index]; + + if (obj == NULL) { + /* We've never seen this one before. Add it to the list and assign + * an id that we can use later. + */ + if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) { + uint32_t new_len = cmd_buffer->execbuf2.objects ? + cmd_buffer->execbuf2.array_length * 2 : 64; + + struct drm_i915_gem_exec_object2 *new_objects = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct anv_bo **new_bos = + anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL); + if (new_objects == NULL) { + anv_device_free(cmd_buffer->device, new_objects); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (cmd_buffer->execbuf2.objects) { + memcpy(new_objects, cmd_buffer->execbuf2.objects, + cmd_buffer->execbuf2.bo_count * sizeof(*new_objects)); + memcpy(new_bos, cmd_buffer->execbuf2.bos, + cmd_buffer->execbuf2.bo_count * sizeof(*new_bos)); + } + + cmd_buffer->execbuf2.objects = new_objects; + cmd_buffer->execbuf2.bos = new_bos; + cmd_buffer->execbuf2.array_length = new_len; + } + + assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length); + + bo->index = cmd_buffer->execbuf2.bo_count++; + obj = &cmd_buffer->execbuf2.objects[bo->index]; + cmd_buffer->execbuf2.bos[bo->index] = bo; + + obj->handle = bo->gem_handle; + obj->relocation_count = 0; + obj->relocs_ptr = 0; + obj->alignment = 0; + obj->offset = bo->offset; + obj->flags = 0; + obj->rsvd1 = 0; + obj->rsvd2 = 0; + } + + if (relocs != NULL && obj->relocation_count == 0) { + /* This is the first time we've ever seen a list of relocations for + * this BO. Go ahead and set the relocations and then walk the list + * of relocations and add them all. + */ + obj->relocation_count = relocs->num_relocs; + obj->relocs_ptr = (uintptr_t) relocs->relocs; + + for (size_t i = 0; i < relocs->num_relocs; i++) + anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL); + } + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, + struct anv_reloc_list *list) +{ + struct anv_bo *bo; + + /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in + * struct drm_i915_gem_exec_object2 against the bos current offset and if + * all bos haven't moved it will skip relocation processing alltogether. + * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming + * value of offset so we can set it either way. For that to work we need + * to make sure all relocs use the same presumed offset. + */ + + for (size_t i = 0; i < list->num_relocs; i++) { + bo = list->reloc_bos[i]; + if (bo->offset != list->relocs[i].presumed_offset) + cmd_buffer->execbuf2.need_reloc = true; + + list->relocs[i].target_handle = bo->index; + } +} + +void +anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_batch *batch = &cmd_buffer->batch; + + cmd_buffer->execbuf2.bo_count = 0; + cmd_buffer->execbuf2.need_reloc = false; + + /* First, we walk over all of the bos we've seen and add them and their + * relocations to the validate list. + */ + struct anv_batch_bo **bbo; + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs); + + struct anv_batch_bo *first_batch_bo = + list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); + + /* The kernel requires that the last entry in the validation list be the + * batch buffer to execute. We can simply swap the element + * corresponding to the first batch_bo in the chain with the last + * element in the list. + */ + if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) { + uint32_t idx = first_batch_bo->bo.index; + + struct drm_i915_gem_exec_object2 tmp_obj = + cmd_buffer->execbuf2.objects[idx]; + assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo); + + cmd_buffer->execbuf2.objects[idx] = + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx] = + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1]; + cmd_buffer->execbuf2.bos[idx]->index = idx; + + cmd_buffer->execbuf2.objects[cmd_buffer->execbuf2.bo_count - 1] = tmp_obj; + cmd_buffer->execbuf2.bos[cmd_buffer->execbuf2.bo_count - 1] = + &first_batch_bo->bo; + first_batch_bo->bo.index = cmd_buffer->execbuf2.bo_count - 1; + } + + /* Now we go through and fixup all of the relocation lists to point to + * the correct indices in the object array. We have to do this after we + * reorder the list above as some of the indices may have changed. + */ + anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) + anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); + + cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) { + .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects, + .buffer_count = cmd_buffer->execbuf2.bo_count, + .batch_start_offset = 0, + .batch_len = batch->next - batch->start, + .cliprects_ptr = 0, + .num_cliprects = 0, + .DR1 = 0, + .DR4 = 0, + .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, + .rsvd1 = cmd_buffer->device->context_id, + .rsvd2 = 0, + }; + + if (!cmd_buffer->execbuf2.need_reloc) + cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC; +} diff --git a/src/vulkan/anv_cmd_buffer.c b/src/vulkan/anv_cmd_buffer.c new file mode 100644 index 00000000000..5178f6529ab --- /dev/null +++ b/src/vulkan/anv_cmd_buffer.c @@ -0,0 +1,1433 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +/** \file anv_cmd_buffer.c + * + * This file contains all of the stuff for emitting commands into a command + * buffer. This includes implementations of most of the vkCmd* + * entrypoints. This file is concerned entirely with state emission and + * not with the command buffer data structure itself. As far as this file + * is concerned, most of anv_cmd_buffer is magic. + */ + +static void +anv_cmd_state_init(struct anv_cmd_state *state) +{ + state->rs_state = NULL; + state->vp_state = NULL; + state->cb_state = NULL; + state->ds_state = NULL; + memset(&state->state_vf, 0, sizeof(state->state_vf)); + memset(&state->descriptors, 0, sizeof(state->descriptors)); + + state->dirty = 0; + state->vb_dirty = 0; + state->descriptors_dirty = 0; + state->pipeline = NULL; + state->vp_state = NULL; + state->rs_state = NULL; + state->ds_state = NULL; +} + +VkResult anv_CreateCommandBuffer( + VkDevice _device, + const VkCmdBufferCreateInfo* pCreateInfo, + VkCmdBuffer* pCmdBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool); + struct anv_cmd_buffer *cmd_buffer; + VkResult result; + + cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (cmd_buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cmd_buffer->device = device; + + result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer); + if (result != VK_SUCCESS) + goto fail; + + anv_state_stream_init(&cmd_buffer->surface_state_stream, + &device->surface_state_block_pool); + anv_state_stream_init(&cmd_buffer->dynamic_state_stream, + &device->dynamic_state_block_pool); + + cmd_buffer->level = pCreateInfo->level; + cmd_buffer->opt_flags = 0; + + anv_cmd_state_init(&cmd_buffer->state); + + if (pool) { + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + } else { + /* Init the pool_link so we can safefly call list_del when we destroy + * the command buffer + */ + list_inithead(&cmd_buffer->pool_link); + } + + *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer); + + return VK_SUCCESS; + + fail: anv_device_free(device, cmd_buffer); + + return result; +} + +VkResult anv_DestroyCommandBuffer( + VkDevice _device, + VkCmdBuffer _cmd_buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer); + + list_del(&cmd_buffer->pool_link); + + anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer); + + anv_state_stream_finish(&cmd_buffer->surface_state_stream); + anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); + anv_device_free(device, cmd_buffer); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandBuffer( + VkCmdBuffer cmdBuffer, + VkCmdBufferResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer); + + anv_cmd_state_init(&cmd_buffer->state); + + return VK_SUCCESS; +} + +void +anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_bo *scratch_bo = NULL; + + cmd_buffer->state.scratch_size = + anv_block_pool_size(&device->scratch_block_pool); + if (cmd_buffer->state.scratch_size > 0) + scratch_bo = &device->scratch_block_pool.bo; + + anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS, + .GeneralStateBaseAddress = { scratch_bo, 0 }, + .GeneralStateMemoryObjectControlState = GEN8_MOCS, + .GeneralStateBaseAddressModifyEnable = true, + .GeneralStateBufferSize = 0xfffff, + .GeneralStateBufferSizeModifyEnable = true, + + .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 }, + .SurfaceStateMemoryObjectControlState = GEN8_MOCS, + .SurfaceStateBaseAddressModifyEnable = true, + + .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 }, + .DynamicStateMemoryObjectControlState = GEN8_MOCS, + .DynamicStateBaseAddressModifyEnable = true, + .DynamicStateBufferSize = 0xfffff, + .DynamicStateBufferSizeModifyEnable = true, + + .IndirectObjectBaseAddress = { NULL, 0 }, + .IndirectObjectMemoryObjectControlState = GEN8_MOCS, + .IndirectObjectBaseAddressModifyEnable = true, + .IndirectObjectBufferSize = 0xfffff, + .IndirectObjectBufferSizeModifyEnable = true, + + .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 }, + .InstructionMemoryObjectControlState = GEN8_MOCS, + .InstructionBaseAddressModifyEnable = true, + .InstructionBufferSize = 0xfffff, + .InstructionBuffersizeModifyEnable = true); + + /* After re-setting the surface state base address, we have to do some + * cache flusing so that the sampler engine will pick up the new + * SURFACE_STATE objects and binding tables. From the Broadwell PRM, + * Shared Function > 3D Sampler > State > State Caching (page 96): + * + * Coherency with system memory in the state cache, like the texture + * cache is handled partially by software. It is expected that the + * command stream or shader will issue Cache Flush operation or + * Cache_Flush sampler message to ensure that the L1 cache remains + * coherent with system memory. + * + * [...] + * + * Whenever the value of the Dynamic_State_Base_Addr, + * Surface_State_Base_Addr are altered, the L1 state cache must be + * invalidated to ensure the new surface or sampler state is fetched + * from system memory. + * + * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit + * which, according the PIPE_CONTROL instruction documentation in the + * Broadwell PRM: + * + * Setting this bit is independent of any other bit in this packet. + * This bit controls the invalidation of the L1 and L2 state caches + * at the top of the pipe i.e. at the parsing time. + * + * Unfortunately, experimentation seems to indicate that state cache + * invalidation through a PIPE_CONTROL does nothing whatsoever in + * regards to surface state and binding tables. In stead, it seems that + * invalidating the texture cache is what is actually needed. + * + * XXX: As far as we have been able to determine through + * experimentation, shows that flush the texture cache appears to be + * sufficient. The theory here is that all of the sampling/rendering + * units cache the binding table in the texture cache. However, we have + * yet to be able to actually confirm this. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .TextureCacheInvalidationEnable = true); +} + +VkResult anv_BeginCommandBuffer( + VkCmdBuffer cmdBuffer, + const VkCmdBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + cmd_buffer->opt_flags = pBeginInfo->flags; + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) { + cmd_buffer->state.framebuffer = + anv_framebuffer_from_handle(pBeginInfo->framebuffer); + cmd_buffer->state.pass = + anv_render_pass_from_handle(pBeginInfo->renderPass); + + /* FIXME: We shouldn't be starting on the first subpass */ + anv_cmd_buffer_begin_subpass(cmd_buffer, + &cmd_buffer->state.pass->subpasses[0]); + } + + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + cmd_buffer->state.current_pipeline = UINT32_MAX; + + return VK_SUCCESS; +} + +VkResult anv_EndCommandBuffer( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) { + /* The algorithm used to compute the validate list is not threadsafe as + * it uses the bo->index field. We have to lock the device around it. + * Fortunately, the chances for contention here are probably very low. + */ + pthread_mutex_lock(&device->mutex); + anv_cmd_buffer_prepare_execbuf(cmd_buffer); + pthread_mutex_unlock(&device->mutex); + } + + return VK_SUCCESS; +} + +void anv_CmdBindPipeline( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->state.pipeline = pipeline; + cmd_buffer->state.vb_dirty |= pipeline->vb_used; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + default: + assert(!"invalid bind point"); + break; + } +} + +void anv_CmdBindDynamicViewportState( + VkCmdBuffer cmdBuffer, + VkDynamicViewportState dynamicViewportState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState); + + cmd_buffer->state.vp_state = vp_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY; +} + +void anv_CmdBindDynamicRasterState( + VkCmdBuffer cmdBuffer, + VkDynamicRasterState dynamicRasterState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState); + + cmd_buffer->state.rs_state = rs_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY; +} + +void anv_CmdBindDynamicColorBlendState( + VkCmdBuffer cmdBuffer, + VkDynamicColorBlendState dynamicColorBlendState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState); + + cmd_buffer->state.cb_state = cb_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY; +} + +void anv_CmdBindDynamicDepthStencilState( + VkCmdBuffer cmdBuffer, + VkDynamicDepthStencilState dynamicDepthStencilState) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState); + + cmd_buffer->state.ds_state = ds_state; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY; +} + +void anv_CmdBindDescriptorSets( + VkCmdBuffer cmdBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t setCount, + const VkDescriptorSet* pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t* pDynamicOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout); + struct anv_descriptor_set_layout *set_layout; + + assert(firstSet + setCount < MAX_SETS); + + uint32_t dynamic_slot = 0; + for (uint32_t i = 0; i < setCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + set_layout = layout->set[firstSet + i].layout; + + cmd_buffer->state.descriptors[firstSet + i].set = set; + + assert(set_layout->num_dynamic_buffers < + ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets)); + memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets, + pDynamicOffsets + dynamic_slot, + set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets)); + + cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages; + + dynamic_slot += set_layout->num_dynamic_buffers; + } +} + +void anv_CmdBindIndexBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + static const uint32_t vk_to_gen_index_type[] = { + [VK_INDEX_TYPE_UINT16] = INDEX_WORD, + [VK_INDEX_TYPE_UINT32] = INDEX_DWORD, + }; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX, + }; + GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf); + + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER, + .IndexFormat = vk_to_gen_index_type[indexType], + .MemoryObjectControlState = GEN8_MOCS, + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset); +} + +void anv_CmdBindVertexBuffers( + VkCmdBuffer cmdBuffer, + uint32_t startBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(startBinding + bindingCount < MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]); + vb[startBinding + i].offset = pOffsets[i]; + cmd_buffer->state.vb_dirty |= 1 << (startBinding + i); + } +} + +static VkResult +cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *bt_state) +{ + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_pipeline_layout *layout; + uint32_t attachments, bias, size; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + if (stage == VK_SHADER_STAGE_FRAGMENT) { + bias = MAX_RTS; + attachments = subpass->color_count; + } else { + bias = 0; + attachments = 0; + } + + /* This is a little awkward: layout can be NULL but we still have to + * allocate and set a binding table for the PS stage for render + * targets. */ + uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0; + + if (attachments + surface_count == 0) + return VK_SUCCESS; + + size = (bias + surface_count) * sizeof(uint32_t); + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; + + if (bt_state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + /* This is highly annoying. The Vulkan spec puts the depth-stencil + * attachments in with the color attachments. Unfortunately, thanks to + * other aspects of the API, we cana't really saparate them before this + * point. Therefore, we have to walk all of the attachments but only + * put the color attachments into the binding table. + */ + for (uint32_t a = 0; a < attachments; a++) { + const struct anv_attachment_view *attachment = + fb->attachments[subpass->color_attachments[a]]; + + assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR); + const struct anv_color_attachment_view *view = + (const struct anv_color_attachment_view *)attachment; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + memcpy(state.map, view->view.surface_state.map, 64); + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->view.bo, view->view.offset); + + bt_map[a] = state.offset; + } + + if (layout == NULL) + return VK_SUCCESS; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *surface_slots = + set_layout->stage[stage].surface_start; + + uint32_t start = bias + layout->set[set].surface_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) { + struct anv_surface_view *view = + d->set->descriptors[surface_slots[b].index].view; + + if (!view) + continue; + + struct anv_state state = + anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64); + + if (state.map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + uint32_t offset; + if (surface_slots[b].dynamic_slot >= 0) { + uint32_t dynamic_offset = + d->dynamic_offsets[surface_slots[b].dynamic_slot]; + + offset = view->offset + dynamic_offset; + anv_fill_buffer_surface_state(state.map, view->format, offset, + view->range - dynamic_offset); + } else { + offset = view->offset; + memcpy(state.map, view->surface_state.map, 64); + } + + /* The address goes in dwords 8 and 9 of the SURFACE_STATE */ + *(uint64_t *)(state.map + 8 * 4) = + anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer), + cmd_buffer->device, + state.offset + 8 * 4, + view->bo, offset); + + bt_map[start + b] = state.offset; + } + } + + return VK_SUCCESS; +} + +static VkResult +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) +{ + struct anv_pipeline_layout *layout; + uint32_t sampler_count; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->state.compute_pipeline->layout; + else + layout = cmd_buffer->state.pipeline->layout; + + sampler_count = layout ? layout->stage[stage].sampler_count : 0; + if (sampler_count == 0) + return VK_SUCCESS; + + uint32_t size = sampler_count * 16; + *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32); + + if (state->map == NULL) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + for (uint32_t set = 0; set < layout->num_sets; set++) { + struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set]; + struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; + struct anv_descriptor_slot *sampler_slots = + set_layout->stage[stage].sampler_start; + + uint32_t start = layout->set[set].sampler_start[stage]; + + for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) { + struct anv_sampler *sampler = + d->set->descriptors[sampler_slots[b].index].sampler; + + if (!sampler) + continue; + + memcpy(state->map + (start + b) * 16, + sampler->state, sizeof(sampler->state)); + } + } + + return VK_SUCCESS; +} + +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) +{ + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; +} + +static void +flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) +{ + uint32_t s, dirty = cmd_buffer->state.descriptors_dirty & + cmd_buffer->state.pipeline->active_stages; + + VkResult result = VK_SUCCESS; + for_each_bit(s, dirty) { + result = flush_descriptor_set(cmd_buffer, s); + if (result != VK_SUCCESS) + break; + } + + if (result != VK_SUCCESS) { + assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); + + /* Re-emit state base addresses so we get the new surface state base + * address before we start emitting binding tables etc. + */ + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + /* Re-emit all active binding tables */ + for_each_bit(s, cmd_buffer->state.pipeline->active_stages) { + result = flush_descriptor_set(cmd_buffer, s); + + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } + } + + cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages; +} + +static struct anv_state +anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + memcpy(state.map, a, dwords * 4); + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4)); + + return state; +} + +static struct anv_state +anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, + uint32_t *a, uint32_t *b, + uint32_t dwords, uint32_t alignment) +{ + struct anv_state state; + uint32_t *p; + + state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + dwords * 4, alignment); + p = state.map; + for (uint32_t i = 0; i < dwords; i++) + p[i] = a[i] | b[i]; + + VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); + + return state; +} + +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->state.current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->state.current_pipeline = GPGPU; + } + + if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->state.compute_dirty = 0; +} + +static void +anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t *p; + + uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used; + + assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); + + if (cmd_buffer->state.current_pipeline != _3D) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = _3D); + cmd_buffer->state.current_pipeline = _3D; + } + + if (vb_emit) { + const uint32_t num_buffers = __builtin_popcount(vb_emit); + const uint32_t num_dwords = 1 + num_buffers * 4; + + p = anv_batch_emitn(&cmd_buffer->batch, num_dwords, + GEN8_3DSTATE_VERTEX_BUFFERS); + uint32_t vb, i = 0; + for_each_bit(vb, vb_emit) { + struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer; + uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset; + + struct GEN8_VERTEX_BUFFER_STATE state = { + .VertexBufferIndex = vb, + .MemoryObjectControlState = GEN8_MOCS, + .AddressModifyEnable = true, + .BufferPitch = pipeline->binding_stride[vb], + .BufferStartingAddress = { buffer->bo, buffer->offset + offset }, + .BufferSize = buffer->size - offset + }; + + GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state); + i++; + } + } + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) { + /* If somebody compiled a pipeline after starting a command buffer the + * scratch bo may have grown since we started this cmd buffer (and + * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, + * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ + if (cmd_buffer->state.scratch_size < pipeline->total_scratch) + anv_cmd_buffer_emit_state_base_address(cmd_buffer); + + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + } + + if (cmd_buffer->state.descriptors_dirty) + flush_descriptor_sets(cmd_buffer); + + if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) { + struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state; + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS, + .ScissorRectPointer = vp_state->scissor.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + .CCViewportPointer = vp_state->cc_vp.offset); + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + .SFClipViewportPointer = vp_state->sf_clip_vp.offset); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_RS_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_sf, + pipeline->state_sf); + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.rs_state->state_raster, + pipeline->state_raster); + } + + if (cmd_buffer->state.ds_state && + (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY))) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.ds_state->state_wm_depth_stencil, + pipeline->state_wm_depth_stencil); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY | + ANV_CMD_BUFFER_DS_DIRTY)) { + struct anv_state state; + if (cmd_buffer->state.ds_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else if (cmd_buffer->state.cb_state == NULL) + state = anv_cmd_buffer_emit_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + else + state = anv_cmd_buffer_merge_dynamic(cmd_buffer, + cmd_buffer->state.ds_state->state_color_calc, + cmd_buffer->state.cb_state->state_color_calc, + GEN8_COLOR_CALC_STATE_length, 64); + + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_CC_STATE_POINTERS, + .ColorCalcStatePointer = state.offset, + .ColorCalcStatePointerValid = true); + } + + if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | + ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) { + anv_batch_emit_merge(&cmd_buffer->batch, + cmd_buffer->state.state_vf, pipeline->state_vf); + } + + cmd_buffer->state.vb_dirty &= ~vb_emit; + cmd_buffer->state.dirty = 0; +} + +void anv_CmdDraw( + VkCmdBuffer cmdBuffer, + uint32_t firstVertex, + uint32_t vertexCount, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = SEQUENTIAL, + .VertexCountPerInstance = vertexCount, + .StartVertexLocation = firstVertex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = 0); +} + +void anv_CmdDrawIndexed( + VkCmdBuffer cmdBuffer, + uint32_t firstIndex, + uint32_t indexCount, + int32_t vertexOffset, + uint32_t firstInstance, + uint32_t instanceCount) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .VertexAccessType = RANDOM, + .VertexCountPerInstance = indexCount, + .StartVertexLocation = firstIndex, + .InstanceCount = instanceCount, + .StartInstanceLocation = firstInstance, + .BaseVertexLocation = vertexOffset); +} + +static void +anv_batch_lrm(struct anv_batch *batch, + uint32_t reg, struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); +} + +static void +anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM, + .RegisterOffset = reg, + .DataDWord = imm); +} + +/* Auto-Draw / Indirect Registers */ +#define GEN7_3DPRIM_END_OFFSET 0x2420 +#define GEN7_3DPRIM_START_VERTEX 0x2430 +#define GEN7_3DPRIM_VERTEX_COUNT 0x2434 +#define GEN7_3DPRIM_INSTANCE_COUNT 0x2438 +#define GEN7_3DPRIM_START_INSTANCE 0x243C +#define GEN7_3DPRIM_BASE_VERTEX 0x2440 + +void anv_CmdDrawIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12); + anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = SEQUENTIAL); +} + +void anv_CmdDrawIndexedIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t count, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12); + anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE, + .IndirectParameterEnable = true, + .VertexAccessType = RANDOM); +} + +void anv_CmdDispatch( + VkCmdBuffer cmdBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +void anv_CmdDispatchIndirect( + VkCmdBuffer cmdBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .SIMDSize = prog_data->simd_size / 16, + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max, + .RightExecutionMask = pipeline->cs_right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); +} + +void anv_CmdSetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdResetEvent( + VkCmdBuffer cmdBuffer, + VkEvent event, + VkPipelineStageFlags stageMask) +{ + stub(); +} + +void anv_CmdWaitEvents( + VkCmdBuffer cmdBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + stub(); +} + +void anv_CmdPipelineBarrier( + VkCmdBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkBool32 byRegion, + uint32_t memBarrierCount, + const void* const* ppMemBarriers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + uint32_t b, *dw; + + struct GEN8_PIPE_CONTROL cmd = { + GEN8_PIPE_CONTROL_header, + .PostSyncOperation = NoWrite, + }; + + /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */ + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + /* This is just what PIPE_CONTROL does */ + } + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { + cmd.StallAtPixelScoreboard = true; + } + + + if (anv_clear_mask(&srcStageMask, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_TRANSITION_BIT)) { + cmd.CommandStreamerStallEnable = true; + } + + if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) { + anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT"); + } + + /* On our hardware, all stages will wait for execution as needed. */ + (void)destStageMask; + + /* We checked all known VkPipeEventFlags. */ + anv_assert(srcStageMask == 0); + + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkMemoryOutputFlags out_flags = 0; + VkMemoryInputFlags in_flags = 0; + + for (uint32_t i = 0; i < memBarrierCount; i++) { + const struct anv_common *common = ppMemBarriers[i]; + switch (common->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: { + ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common); + out_flags |= barrier->outputMask; + in_flags |= barrier->inputMask; + break; + } + default: + unreachable("Invalid memory barrier type"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryOutputFlags)(1 << b)) { + case VK_MEMORY_OUTPUT_HOST_WRITE_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT: + cmd.DCFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT: + cmd.RenderTargetCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + cmd.DepthCacheFlushEnable = true; + break; + case VK_MEMORY_OUTPUT_TRANSFER_BIT: + cmd.RenderTargetCacheFlushEnable = true; + cmd.DepthCacheFlushEnable = true; + break; + default: + unreachable("Invalid memory output flag"); + } + } + + for_each_bit(b, out_flags) { + switch ((VkMemoryInputFlags)(1 << b)) { + case VK_MEMORY_INPUT_HOST_READ_BIT: + break; /* FIXME: Little-core systems */ + case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT: + case VK_MEMORY_INPUT_INDEX_FETCH_BIT: + case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT: + cmd.VFCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_UNIFORM_READ_BIT: + cmd.ConstantCacheInvalidationEnable = true; + /* fallthrough */ + case VK_MEMORY_INPUT_SHADER_READ_BIT: + cmd.DCFlushEnable = true; + cmd.TextureCacheInvalidationEnable = true; + break; + case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT: + case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT: + break; /* XXX: Hunh? */ + case VK_MEMORY_INPUT_TRANSFER_BIT: + cmd.TextureCacheInvalidationEnable = true; + break; + } + } + + dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length); + GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd); +} + +void anv_CmdPushConstants( + VkCmdBuffer cmdBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t start, + uint32_t length, + const void* values) +{ + stub(); +} + +static void +anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_subpass *subpass = cmd_buffer->state.subpass; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct anv_depth_stencil_view *view; + + static const struct anv_depth_stencil_view null_view = + { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 }; + + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + const struct anv_attachment_view *aview = + fb->attachments[subpass->depth_stencil_attachment]; + assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL); + view = (const struct anv_depth_stencil_view *)aview; + } else { + view = &null_view; + } + + /* FIXME: Implement the PMA stall W/A */ + /* FIXME: Width and Height are wrong */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER, + .SurfaceType = SURFTYPE_2D, + .DepthWriteEnable = view->depth_stride > 0, + .StencilWriteEnable = view->stencil_stride > 0, + .HierarchicalDepthBufferEnable = false, + .SurfaceFormat = view->depth_format, + .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->depth_offset }, + .Height = cmd_buffer->state.framebuffer->height - 1, + .Width = cmd_buffer->state.framebuffer->width - 1, + .LOD = 0, + .Depth = 1 - 1, + .MinimumArrayElement = 0, + .DepthBufferObjectControlState = GEN8_MOCS, + .RenderTargetViewExtent = 1 - 1, + .SurfaceQPitch = view->depth_qpitch >> 2); + + /* Disable hierarchial depth buffers. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER); + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER, + .StencilBufferEnable = view->stencil_stride > 0, + .StencilBufferObjectControlState = GEN8_MOCS, + .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0, + .SurfaceBaseAddress = { view->bo, view->stencil_offset }, + .SurfaceQPitch = view->stencil_qpitch >> 2); + + /* Clear the clear params. */ + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS); +} + +void +anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass) +{ + cmd_buffer->state.subpass = subpass; + + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT; + + anv_cmd_buffer_emit_depth_stencil(cmd_buffer); +} + +void anv_CmdBeginRenderPass( + VkCmdBuffer cmdBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass); + ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + + const VkRect2D *render_area = &pRenderPassBegin->renderArea; + + anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE, + .ClippedDrawingRectangleYMin = render_area->offset.y, + .ClippedDrawingRectangleXMin = render_area->offset.x, + .ClippedDrawingRectangleYMax = + render_area->offset.y + render_area->extent.height - 1, + .ClippedDrawingRectangleXMax = + render_area->offset.x + render_area->extent.width - 1, + .DrawingRectangleOriginY = 0, + .DrawingRectangleOriginX = 0); + + anv_cmd_buffer_clear_attachments(cmd_buffer, pass, + pRenderPassBegin->pAttachmentClearValues); + + anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses); +} + +void anv_CmdNextSubpass( + VkCmdBuffer cmdBuffer, + VkRenderPassContents contents) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1); +} + +void anv_CmdEndRenderPass( + VkCmdBuffer cmdBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + + /* Emit a flushing pipe control at the end of a pass. This is kind of a + * hack but it ensures that render targets always actually get written. + * Eventually, we should do flushing based on image format transitions + * or something of that nature. + */ + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .PostSyncOperation = NoWrite, + .RenderTargetCacheFlushEnable = true, + .InstructionCacheInvalidateEnable = true, + .DepthCacheFlushEnable = true, + .VFCacheInvalidationEnable = true, + .TextureCacheInvalidationEnable = true, + .CommandStreamerStallEnable = true); +} + +void anv_CmdExecuteCommands( + VkCmdBuffer cmdBuffer, + uint32_t cmdBuffersCount, + const VkCmdBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer); + + assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]); + + for (uint32_t i = 0; i < cmdBuffersCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY); + + anv_cmd_buffer_add_secondary(primary, secondary); + } +} + +VkResult anv_CreateCommandPool( + VkDevice _device, + const VkCmdPoolCreateInfo* pCreateInfo, + VkCmdPool* pCmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_cmd_pool *pool; + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + list_inithead(&pool->cmd_buffers); + + *pCmdPool = anv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +VkResult anv_DestroyCommandPool( + VkDevice _device, + VkCmdPool cmdPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + + anv_ResetCommandPool(_device, cmdPool, 0); + + anv_device_free(device, pool); + + return VK_SUCCESS; +} + +VkResult anv_ResetCommandPool( + VkDevice device, + VkCmdPool cmdPool, + VkCmdPoolResetFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool); + + list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer, + &pool->cmd_buffers, pool_link) { + anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer)); + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp new file mode 100644 index 00000000000..258abfb52be --- /dev/null +++ b/src/vulkan/anv_compiler.cpp @@ -0,0 +1,1204 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#include <brw_context.h> +#include <brw_wm.h> /* brw_new_shader_program is here */ +#include <brw_nir.h> + +#include <brw_vs.h> +#include <brw_gs.h> +#include <brw_cs.h> + +#include <mesa/main/shaderobj.h> +#include <mesa/main/fbobject.h> +#include <mesa/main/context.h> +#include <mesa/program/program.h> +#include <glsl/program.h> + +/* XXX: We need this to keep symbols in nir.h from conflicting with the + * generated GEN command packing headers. We need to fix *both* to not + * define something as generic as LOAD. + */ +#undef LOAD + +#include <glsl/nir/nir_spirv.h> + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + uint32_t bias, count, k, *map; + struct anv_pipeline_layout *layout = pipeline->layout; + + /* No layout is valid for shaders that don't bind any resources. */ + if (pipeline->layout == NULL) + return VK_SUCCESS; + + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + count = layout->stage[stage].surface_count; + prog_data->map_entries = + (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); + if (prog_data->map_entries == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + k = bias; + map = prog_data->map_entries; + for (uint32_t i = 0; i < layout->num_sets; i++) { + prog_data->bind_map[i].index = map; + for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) + *map++ = k++; + + prog_data->bind_map[i].index_count = + layout->set[i].layout->stage[stage].surface_count; + } + + return VK_SUCCESS; +} + +static uint32_t +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key->base.program_string_id = vp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + vp->program.Base.UsesClipDistanceOut); + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, + &key->base.tex); +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (vs) { + /* We add padding around uniform values below vec4 size, with the worst + * case being a float value that gets blown up to a vec4, so be + * conservative here. + */ + param_count = vs->num_uniform_components * 4; + + } else { + param_count = vp->program.Base.Parameters->NumParameters * 4; + } + /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip + * planes as uniforms. + */ + param_count += key->base.nr_userclip_plane_consts * 4; + + /* Setting nr_params here NOT to the size of the param and pull_param + * arrays, but to the number of uniform components vec4_visitor + * needs. vec4_visitor::setup_uniforms() will set it back to a proper value. + */ + stage_prog_data->nr_params = ALIGN(param_count, 4) / 4; + if (vs) { + stage_prog_data->nr_params += vs->num_samplers; + } + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (key->copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (key->point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ + if (key->base.userclip_active) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data->base.vue_map, outputs_written); +\ + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ + program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program, + prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + pipeline->vs_simd8 = upload_kernel(pipeline, program, program_size); + + ralloc_free(mem_ctx); + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_program *prog = (struct gl_program *) brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count; + if (fs) { + param_count = fs->num_uniform_components; + } else { + param_count = fp->program.Base.Parameters->NumParameters * 4; + } + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + + program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + uint32_t offset = upload_kernel(pipeline, program, program_size); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_gs_populate_key(struct brw_context *brw, + struct anv_pipeline *pipeline, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + + key->base.program_string_id = gp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + gp->program.Base.UsesClipDistanceOut); + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key->base.tex); + + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + + /* BRW_NEW_VUE_MAP_VS */ + key->input_varyings = prog_data->base.vue_map.slots_valid; +} + +static bool +really_do_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct brw_gs_compile_output output; + + /* FIXME: We pass the bind map to the compile in the output struct. Need + * something better. */ + set_binding_table_layout(&output.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + brw_compile_gs_prog(brw, prog, gp, key, &output); + + pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size); + pipeline->gs_vertex_count = gp->program.VerticesIn; + + ralloc_free(output.mem_ctx); + + return true; +} + +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(prog_data, 0, sizeof(*prog_data)); + + set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count = cs->num_uniform_components; + + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + prog_data->base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data->base.nr_params = param_count; + + program = brw_cs_emit(brw, mem_ctx, key, prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + pipeline->cs_simd = upload_kernel(pipeline, program, program_size); + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + +static void +fail_on_compile_error(int status, const char *msg) +{ + int source, line, column; + char error[256]; + + if (status) + return; + + if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4) + fail_if(!status, "%d:%s\n", line, error); + else + fail_if(!status, "%s\n", msg); +} + +struct anv_compiler { + struct anv_device *device; + struct intel_screen *screen; + struct brw_context *brw; + struct gl_pipeline_object pipeline; +}; + +extern "C" { + +struct anv_compiler * +anv_compiler_create(struct anv_device *device) +{ + const struct brw_device_info *devinfo = &device->info; + struct anv_compiler *compiler; + struct gl_context *ctx; + + compiler = rzalloc(NULL, struct anv_compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = rzalloc(compiler, struct intel_screen); + if (compiler->screen == NULL) + goto fail; + + compiler->brw = rzalloc(compiler, struct brw_context); + if (compiler->brw == NULL) + goto fail; + + compiler->device = device; + + compiler->brw->optionCache.info = NULL; + compiler->brw->bufmgr = NULL; + compiler->brw->gen = devinfo->gen; + compiler->brw->is_g4x = devinfo->is_g4x; + compiler->brw->is_baytrail = devinfo->is_baytrail; + compiler->brw->is_haswell = devinfo->is_haswell; + compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + + compiler->brw->intelScreen = compiler->screen; + compiler->screen->devinfo = &device->info; + + brw_process_intel_debug_variable(compiler->screen); + + compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + + ctx = &compiler->brw->ctx; + _mesa_init_shader_object_functions(&ctx->Driver); + + _mesa_init_constants(&ctx->Const, API_OPENGL_CORE); + + brw_initialize_context_constants(compiler->brw); + + intelInitExtensions(ctx); + + /* Set dd::NewShader */ + brwInitFragProgFuncs(&ctx->Driver); + + ctx->_Shader = &compiler->pipeline; + + compiler->brw->precompile = false; + + return compiler; + + fail: + ralloc_free(compiler); + return NULL; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + _mesa_free_errors_data(&compiler->brw->ctx); + ralloc_free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->vs_simd8 != NO_KERNEL; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + gl_shader_stage stage; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, + { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, + { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, + { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, + { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, + { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, +}; + +struct spirv_header{ + uint32_t magic; + uint32_t version; + uint32_t gen_magic; +}; + +static const char * +src_as_glsl(const char *data) +{ + const struct spirv_header *as_spirv = (const struct spirv_header *)data; + + /* Check alignment */ + if ((intptr_t)data & 0x3) { + return data; + } + + if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) { + /* LunarG back-door */ + if (as_spirv->version == 0) + return data + 12; + else + return NULL; + } else { + return data; + } +} + +static void +anv_compile_shader_glsl(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct gl_shader *shader; + int name = 0; + + shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name); + + shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data)); + _mesa_glsl_compile_shader(&brw->ctx, shader, false, false); + fail_on_compile_error(shader->CompileStatus, shader->InfoLog); + + program->Shaders[program->NumShaders] = shader; + program->NumShaders++; +} + +static void +setup_nir_io(struct gl_program *prog, + nir_shader *shader) +{ + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + prog->InputsRead |= BITFIELD64_BIT(var->data.location); + } + + foreach_list_typed(nir_variable, var, node, &shader->outputs) { + prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); + } +} + +static void +anv_compile_shader_spirv(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct anv_shader *shader = pipeline->shaders[stage]; + struct gl_shader *mesa_shader; + int name = 0; + + mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(mesa_shader == NULL, + "failed to create %s shader\n", stage_info[stage].name); + + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base; + break; + case VK_SHADER_STAGE_GEOMETRY: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base; + break; + case VK_SHADER_STAGE_FRAGMENT: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base; + break; + case VK_SHADER_STAGE_COMPUTE: + mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base; + break; + } + + mesa_shader->Program->Parameters = + rzalloc(mesa_shader, struct gl_program_parameter_list); + + mesa_shader->Type = stage_info[stage].token; + mesa_shader->Stage = stage_info[stage].stage; + + assert(shader->module->size % 4 == 0); + + struct gl_shader_compiler_options *glsl_options = + &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + + mesa_shader->Program->nir = + spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4, + glsl_options->NirOptions); + nir_validate_shader(mesa_shader->Program->nir); + + brw_process_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, + NULL, mesa_shader->Stage, false); + + setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir); + + fail_if(mesa_shader->Program->nir == NULL, + "failed to translate SPIR-V to NIR\n"); + + program->Shaders[program->NumShaders] = mesa_shader; + program->NumShaders++; +} + +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = brw->ctx.Driver.NewShaderProgram(name); + program->Shaders = (struct gl_shader **) + calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + bool all_spirv = true; + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i] == NULL) + continue; + + /* You need at least this much for "void main() { }" anyway */ + assert(pipeline->shaders[i]->module->size >= 12); + + if (src_as_glsl(pipeline->shaders[i]->module->data)) { + all_spirv = false; + break; + } + + assert(pipeline->shaders[i]->module->size % 4 == 0); + } + + if (all_spirv) { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); + } + + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; + } + } else { + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_glsl(compiler, program, pipeline, i); + } + + _mesa_glsl_link_shader(&brw->ctx, program); + fail_on_compile_error(program->LinkStatus, + program->InfoLog); + } + + bool success; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); + + if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + pipeline->vs_simd8 = NO_KERNEL; + } + + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + brw_gs_populate_key(brw, pipeline, bgp, &gs_key); + + success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); + + if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + pipeline->gs_vec4 = NO_KERNEL; + } + + if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); + } + + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); + } + + /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We + * need to fix this ASAP. + */ + if (!all_spirv) + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { + if (pipeline->prog_data[stage]) { + free(pipeline->prog_data[stage]->map_entries); + ralloc_free(pipeline->prog_data[stage]->param); + ralloc_free(pipeline->prog_data[stage]->pull_param); + } + } +} + +} diff --git a/src/vulkan/anv_device.c b/src/vulkan/anv_device.c new file mode 100644 index 00000000000..76381e615d3 --- /dev/null +++ b/src/vulkan/anv_device.c @@ -0,0 +1,2399 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" +#include "mesa/main/git_sha1.h" +#include "util/strtod.h" + +static int +anv_env_get_int(const char *name) +{ + const char *val = getenv(name); + + if (!val) + return 0; + + return strtol(val, NULL, 0); +} + +static VkResult +anv_physical_device_init(struct anv_physical_device *device, + struct anv_instance *instance, + const char *path) +{ + int fd; + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) + return vk_error(VK_ERROR_UNAVAILABLE); + + device->instance = instance; + device->path = path; + + device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE"); + device->no_hw = false; + if (device->chipset_id) { + /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */ + device->no_hw = true; + } else { + device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); + } + if (!device->chipset_id) + goto fail; + + device->name = brw_get_device_name(device->chipset_id); + device->info = brw_get_device_info(device->chipset_id, -1); + if (!device->info) + goto fail; + + if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC)) + goto fail; + + if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS)) + goto fail; + + close(fd); + + return VK_SUCCESS; + +fail: + close(fd); + return vk_error(VK_ERROR_UNAVAILABLE); +} + +static void *default_alloc( + void* pUserData, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return malloc(size); +} + +static void default_free( + void* pUserData, + void* pMem) +{ + free(pMem); +} + +static const VkAllocCallbacks default_alloc_callbacks = { + .pUserData = NULL, + .pfnAlloc = default_alloc, + .pfnFree = default_free +}; + +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + VkInstance* pInstance) +{ + struct anv_instance *instance; + const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks; + void *user_data = NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + if (pCreateInfo->pAllocCb) { + alloc_callbacks = pCreateInfo->pAllocCb; + user_data = pCreateInfo->pAllocCb->pUserData; + } + instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!instance) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + instance->pAllocUserData = alloc_callbacks->pUserData; + instance->pfnAlloc = alloc_callbacks->pfnAlloc; + instance->pfnFree = alloc_callbacks->pfnFree; + instance->apiVersion = pCreateInfo->pAppInfo->apiVersion; + instance->physicalDeviceCount = 0; + + _mesa_locale_init(); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + *pInstance = anv_instance_to_handle(instance); + + return VK_SUCCESS; +} + +VkResult anv_DestroyInstance( + VkInstance _instance) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + + VG(VALGRIND_DESTROY_MEMPOOL(instance)); + + _mesa_locale_fini(); + + instance->pfnFree(instance->pAllocUserData, instance); + + return VK_SUCCESS; +} + +static void * +anv_instance_alloc(struct anv_instance *instance, size_t size, + size_t alignment, VkSystemAllocType allocType) +{ + void *mem = instance->pfnAlloc(instance->pAllocUserData, + size, alignment, allocType); + if (mem) { + VALGRIND_MEMPOOL_ALLOC(instance, mem, size); + VALGRIND_MAKE_MEM_UNDEFINED(mem, size); + } + return mem; +} + +static void +anv_instance_free(struct anv_instance *instance, void *mem) +{ + if (mem == NULL) + return; + + VALGRIND_MEMPOOL_FREE(instance, mem); + + instance->pfnFree(instance->pAllocUserData, mem); +} + +VkResult anv_EnumeratePhysicalDevices( + VkInstance _instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); + VkResult result; + + if (instance->physicalDeviceCount == 0) { + result = anv_physical_device_init(&instance->physicalDevice, + instance, "/dev/dri/renderD128"); + if (result != VK_SUCCESS) + return result; + + instance->physicalDeviceCount = 1; + } + + /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; + * otherwise it's an inout parameter. + * + * The Vulkan spec (git aaed022) says: + * + * pPhysicalDeviceCount is a pointer to an unsigned integer variable + * that is initialized with the number of devices the application is + * prepared to receive handles to. pname:pPhysicalDevices is pointer to + * an array of at least this many VkPhysicalDevice handles [...]. + * + * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices + * overwrites the contents of the variable pointed to by + * pPhysicalDeviceCount with the number of physical devices in in the + * instance; otherwise, vkEnumeratePhysicalDevices overwrites + * pPhysicalDeviceCount with the number of physical handles written to + * pPhysicalDevices. + */ + if (!pPhysicalDevices) { + *pPhysicalDeviceCount = instance->physicalDeviceCount; + } else if (*pPhysicalDeviceCount >= 1) { + pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); + *pPhysicalDeviceCount = 1; + } else { + *pPhysicalDeviceCount = 0; + } + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures* pFeatures) +{ + anv_finishme("Get correct values for PhysicalDeviceFeatures"); + + *pFeatures = (VkPhysicalDeviceFeatures) { + .robustBufferAccess = false, + .fullDrawIndexUint32 = false, + .imageCubeArray = false, + .independentBlend = false, + .geometryShader = true, + .tessellationShader = false, + .sampleRateShading = false, + .dualSourceBlend = true, + .logicOp = true, + .instancedDrawIndirect = true, + .depthClip = false, + .depthBiasClamp = false, + .fillModeNonSolid = true, + .depthBounds = false, + .wideLines = true, + .largePoints = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .textureCompressionBC = true, + .pipelineStatisticsQuery = true, + .vertexSideEffects = false, + .tessellationSideEffects = false, + .geometrySideEffects = false, + .fragmentSideEffects = false, + .shaderTessellationPointSize = false, + .shaderGeometryPointSize = true, + .shaderTextureGatherExtended = true, + .shaderStorageImageExtendedFormats = false, + .shaderStorageImageMultisample = false, + .shaderStorageBufferArrayConstantIndexing = false, + .shaderStorageImageArrayConstantIndexing = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = false, + .shaderStorageBufferArrayDynamicIndexing = false, + .shaderStorageImageArrayDynamicIndexing = false, + .shaderClipDistance = false, + .shaderCullDistance = false, + .shaderFloat64 = false, + .shaderInt64 = false, + .shaderFloat16 = false, + .shaderInt16 = false, + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceLimits( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceLimits* pLimits) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct brw_device_info *devinfo = physical_device->info; + + anv_finishme("Get correct values for PhysicalDeviceLimits"); + + *pLimits = (VkPhysicalDeviceLimits) { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 10), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 10), + .maxTexelBufferSize = (1 << 14), + .maxUniformBufferSize = UINT32_MAX, + .maxStorageBufferSize = UINT32_MAX, + .maxPushConstantsSize = 128, + .maxMemoryAllocationCount = UINT32_MAX, + .bufferImageGranularity = 64, /* A cache line */ + .maxBoundDescriptorSets = MAX_SETS, + .maxDescriptorSets = UINT32_MAX, + .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorStorageBuffers = 64, + .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorStorageImages = 64, + .maxDescriptorSetSamplers = 256, + .maxDescriptorSetUniformBuffers = 256, + .maxDescriptorSetStorageBuffers = 256, + .maxDescriptorSetSampledImages = 256, + .maxDescriptorSetStorageImages = 256, + .maxVertexInputAttributes = 32, + .maxVertexInputAttributeOffset = 256, + .maxVertexInputBindingStride = 256, + .maxVertexOutputComponents = 32, + .maxTessGenLevel = 0, + .maxTessPatchSize = 0, + .maxTessControlPerVertexInputComponents = 0, + .maxTessControlPerVertexOutputComponents = 0, + .maxTessControlPerPatchOutputComponents = 0, + .maxTessControlTotalOutputComponents = 0, + .maxTessEvaluationInputComponents = 0, + .maxTessEvaluationOutputComponents = 0, + .maxGeometryShaderInvocations = 6, + .maxGeometryInputComponents = 16, + .maxGeometryOutputComponents = 16, + .maxGeometryOutputVertices = 16, + .maxGeometryTotalOutputComponents = 16, + .maxFragmentInputComponents = 16, + .maxFragmentOutputBuffers = 8, + .maxFragmentDualSourceBuffers = 2, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 1024, + .maxComputeWorkGroupCount = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, + .maxComputeWorkGroupSize = { + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + 16 * devinfo->max_cs_threads, + }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectInstanceCount = UINT32_MAX, + .primitiveRestartForPatches = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = 16, + .maxDynamicViewportStates = UINT32_MAX, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */ + .viewportSubPixelBits = 13, /* We take a float? */ + .minMemoryMapAlignment = 64, /* A cache line */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 1, + .minStorageBufferOffsetAlignment = 1, + .minTexelOffset = 0, /* FIXME */ + .maxTexelOffset = 0, /* FIXME */ + .minTexelGatherOffset = 0, /* FIXME */ + .maxTexelGatherOffset = 0, /* FIXME */ + .minInterpolationOffset = 0, /* FIXME */ + .maxInterpolationOffset = 0, /* FIXME */ + .subPixelInterpolationOffsetBits = 0, /* FIXME */ + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .maxFramebufferColorSamples = 8, + .maxFramebufferDepthSamples = 8, + .maxFramebufferStencilSamples = 8, + .maxColorAttachments = MAX_RTS, + .maxSampledImageColorSamples = 8, + .maxSampledImageDepthSamples = 8, + .maxSampledImageIntegerSamples = 1, + .maxStorageImageSamples = 1, + .maxSampleMaskWords = 1, + .timestampFrequency = 1000 * 1000 * 1000 / 80, + .maxClipDistances = 0 /* FIXME */, + .maxCullDistances = 0 /* FIXME */, + .maxCombinedClipAndCullDistances = 0 /* FIXME */, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties* pProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + *pProperties = (VkPhysicalDeviceProperties) { + .apiVersion = VK_MAKE_VERSION(0, 138, 1), + .driverVersion = 1, + .vendorId = 0x8086, + .deviceId = pdevice->chipset_id, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + }; + + strcpy(pProperties->deviceName, pdevice->name); + snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH, + "anv-%s", MESA_GIT_SHA1 + 4); + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceQueueCount( + VkPhysicalDevice physicalDevice, + uint32_t* pCount) +{ + *pCount = 1; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceQueueProperties( + VkPhysicalDevice physicalDevice, + uint32_t count, + VkPhysicalDeviceQueueProperties* pQueueProperties) +{ + assert(count == 1); + + *pQueueProperties = (VkPhysicalDeviceQueueProperties) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_DMA_BIT, + .queueCount = 1, + .supportsTimestamps = true, + }; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties* pMemoryProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + VkDeviceSize heap_size; + + /* Reserve some wiggle room for the driver by exposing only 75% of the + * aperture to the heap. + */ + heap_size = 3 * physical_device->aperture_size / 4; + + /* The property flags below are valid only for llc platforms. */ + pMemoryProperties->memoryTypeCount = 1; + pMemoryProperties->memoryTypes[0] = (VkMemoryType) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + .heapIndex = 1, + }; + + pMemoryProperties->memoryHeapCount = 1; + pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_HOST_LOCAL, + }; + + return VK_SUCCESS; +} + +PFN_vkVoidFunction anv_GetInstanceProcAddr( + VkInstance instance, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +PFN_vkVoidFunction anv_GetDeviceProcAddr( + VkDevice device, + const char* pName) +{ + return anv_lookup_entrypoint(pName); +} + +static void +parse_debug_flags(struct anv_device *device) +{ + const char *debug, *p, *end; + + debug = getenv("INTEL_DEBUG"); + device->dump_aub = false; + if (debug) { + for (p = debug; *p; p = end + 1) { + end = strchrnul(p, ','); + if (end - p == 3 && memcmp(p, "aub", 3) == 0) + device->dump_aub = true; + if (end - p == 5 && memcmp(p, "no_hw", 5) == 0) + device->no_hw = true; + if (*end == '\0') + break; + } + } +} + +static VkResult +anv_queue_init(struct anv_device *device, struct anv_queue *queue) +{ + queue->device = device; + queue->pool = &device->surface_state_pool; + + queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4); + if (queue->completed_serial.map == NULL) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + *(uint32_t *)queue->completed_serial.map = 0; + queue->next_serial = 1; + + return VK_SUCCESS; +} + +static void +anv_queue_finish(struct anv_queue *queue) +{ +#ifdef HAVE_VALGRIND + /* This gets torn down with the device so we only need to do this if + * valgrind is present. + */ + anv_state_pool_free(queue->pool, queue->completed_serial); +#endif +} + +static void +anv_device_init_border_colors(struct anv_device *device) +{ + static const VkClearColorValue border_colors[] = { + [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } }, + [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } }, + [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } }, + [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } }, + [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } }, + }; + + device->border_colors = + anv_state_pool_alloc(&device->dynamic_state_pool, + sizeof(border_colors), 32); + memcpy(device->border_colors.map, border_colors, sizeof(border_colors)); +} + +VkResult anv_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + VkDevice* pDevice) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + struct anv_instance *instance = physical_device->instance; + struct anv_device *device; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); + + device = anv_instance_alloc(instance, sizeof(*device), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!device) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + device->no_hw = physical_device->no_hw; + parse_debug_flags(device); + + device->instance = physical_device->instance; + + /* XXX(chadv): Can we dup() physicalDevice->fd here? */ + device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); + if (device->fd == -1) + goto fail_device; + + device->context_id = anv_gem_create_context(device); + if (device->context_id == -1) + goto fail_fd; + + anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE); + + anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048); + + anv_state_pool_init(&device->dynamic_state_pool, + &device->dynamic_state_block_pool); + + anv_block_pool_init(&device->instruction_block_pool, device, 2048); + anv_block_pool_init(&device->surface_state_block_pool, device, 2048); + + anv_state_pool_init(&device->surface_state_pool, + &device->surface_state_block_pool); + + anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + + device->info = *physical_device->info; + + device->compiler = anv_compiler_create(device); + device->aub_writer = NULL; + + pthread_mutex_init(&device->mutex, NULL); + + anv_queue_init(device, &device->queue); + + anv_device_init_meta(device); + + anv_device_init_border_colors(device); + + *pDevice = anv_device_to_handle(device); + + return VK_SUCCESS; + + fail_fd: + close(device->fd); + fail_device: + anv_device_free(device, device); + + return vk_error(VK_ERROR_UNAVAILABLE); +} + +VkResult anv_DestroyDevice( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_compiler_destroy(device->compiler); + + anv_queue_finish(&device->queue); + + anv_device_finish_meta(device); + +#ifdef HAVE_VALGRIND + /* We only need to free these to prevent valgrind errors. The backing + * BO will go away in a couple of lines so we don't actually leak. + */ + anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); +#endif + + anv_bo_pool_finish(&device->batch_bo_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->surface_state_pool); + anv_block_pool_finish(&device->surface_state_block_pool); + anv_block_pool_finish(&device->scratch_block_pool); + + close(device->fd); + + if (device->aub_writer) + anv_aub_writer_destroy(device->aub_writer); + + anv_instance_free(device->instance, device); + + return VK_SUCCESS; +} + +static const VkExtensionProperties global_extensions[] = { + { + .extName = "VK_WSI_LunarG", + .specVersion = 3 + } +}; + +VkResult anv_GetGlobalExtensionProperties( + const char* pLayerName, + uint32_t* pCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = ARRAY_SIZE(global_extensions); + return VK_SUCCESS; + } + + assert(*pCount < ARRAY_SIZE(global_extensions)); + + *pCount = ARRAY_SIZE(global_extensions); + memcpy(pProperties, global_extensions, sizeof(global_extensions)); + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char* pLayerName, + uint32_t* pCount, + VkExtensionProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_EXTENSION); +} + +VkResult anv_GetGlobalLayerProperties( + uint32_t* pCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); +} + +VkResult anv_GetPhysicalDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t* pCount, + VkLayerProperties* pProperties) +{ + if (pProperties == NULL) { + *pCount = 0; + return VK_SUCCESS; + } + + /* None supported at this time */ + return vk_error(VK_ERROR_INVALID_LAYER); +} + +VkResult anv_GetDeviceQueue( + VkDevice _device, + uint32_t queueNodeIndex, + uint32_t queueIndex, + VkQueue* pQueue) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(queueIndex == 0); + + *pQueue = anv_queue_to_handle(&device->queue); + + return VK_SUCCESS; +} + +VkResult anv_QueueSubmit( + VkQueue _queue, + uint32_t cmdBufferCount, + const VkCmdBuffer* pCmdBuffers, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + struct anv_device *device = queue->device; + int ret; + + for (uint32_t i = 0; i < cmdBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]); + + assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY); + + if (device->dump_aub) + anv_cmd_buffer_dump(cmd_buffer); + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); + + if (fence) { + ret = anv_gem_execbuffer(device, &fence->execbuf); + if (ret != 0) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++) + cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset; + } else { + *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial; + } + } + + return VK_SUCCESS; +} + +VkResult anv_QueueWaitIdle( + VkQueue _queue) +{ + ANV_FROM_HANDLE(anv_queue, queue, _queue); + + return vkDeviceWaitIdle(anv_device_to_handle(queue->device)); +} + +VkResult anv_DeviceWaitIdle( + VkDevice _device) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_state state; + struct anv_batch batch; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + struct anv_bo *bo = NULL; + VkResult result; + int64_t timeout; + int ret; + + state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32); + bo = &device->dynamic_state_pool.block_pool->bo; + batch.start = batch.next = state.map; + batch.end = state.map + 32; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + exec2_objects[0].handle = bo->gem_handle; + exec2_objects[0].relocation_count = 0; + exec2_objects[0].relocs_ptr = 0; + exec2_objects[0].alignment = 0; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = 0; + exec2_objects[0].rsvd1 = 0; + exec2_objects[0].rsvd2 = 0; + + execbuf.buffers_ptr = (uintptr_t) exec2_objects; + execbuf.buffer_count = 1; + execbuf.batch_start_offset = state.offset; + execbuf.batch_len = batch.next - state.map; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + + execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + execbuf.rsvd1 = device->context_id; + execbuf.rsvd2 = 0; + + if (!device->no_hw) { + ret = anv_gem_execbuffer(device, &execbuf); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + timeout = INT64_MAX; + ret = anv_gem_wait(device, bo->gem_handle, &timeout); + if (ret != 0) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + } + + anv_state_pool_free(&device->dynamic_state_pool, state); + + return VK_SUCCESS; + + fail: + anv_state_pool_free(&device->dynamic_state_pool, state); + + return result; +} + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType) +{ + return anv_instance_alloc(device->instance, size, alignment, allocType); +} + +void +anv_device_free(struct anv_device * device, + void * mem) +{ + anv_instance_free(device->instance, mem); +} + +VkResult +anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) +{ + bo->gem_handle = anv_gem_create(device, size); + if (!bo->gem_handle) + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + + bo->map = NULL; + bo->index = 0; + bo->offset = 0; + bo->size = size; + + return VK_SUCCESS; +} + +VkResult anv_AllocMemory( + VkDevice _device, + const VkMemoryAllocInfo* pAllocInfo, + VkDeviceMemory* pMem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + VkResult result; + + assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO); + + if (pAllocInfo->memoryTypeIndex != 0) { + /* We support exactly one memory heap. */ + return vk_error(VK_ERROR_INVALID_VALUE); + } + + /* FINISHME: Fail if allocation request exceeds heap size. */ + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize); + if (result != VK_SUCCESS) + goto fail; + + *pMem = anv_device_memory_to_handle(mem); + + return VK_SUCCESS; + + fail: + anv_device_free(device, mem); + + return result; +} + +VkResult anv_FreeMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + if (mem->bo.map) + anv_gem_munmap(mem->bo.map, mem->bo.size); + + if (mem->bo.gem_handle != 0) + anv_gem_close(device, mem->bo.gem_handle); + + anv_device_free(device, mem); + + return VK_SUCCESS; +} + +VkResult anv_MapMemory( + VkDevice _device, + VkDeviceMemory _mem, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void** ppData) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only + * takes a VkDeviceMemory pointer, it seems like only one map of the memory + * at a time is valid. We could just mmap up front and return an offset + * pointer here, but that may exhaust virtual memory on 32 bit + * userspace. */ + + mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size); + mem->map_size = size; + + *ppData = mem->map; + + return VK_SUCCESS; +} + +VkResult anv_UnmapMemory( + VkDevice _device, + VkDeviceMemory _mem) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + + anv_gem_munmap(mem->map, mem->map_size); + + return VK_SUCCESS; +} + +VkResult anv_FlushMappedMemoryRanges( + VkDevice device, + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) +{ + /* clflush here for !llc platforms */ + + return VK_SUCCESS; +} + +VkResult anv_InvalidateMappedMemoryRanges( + VkDevice device, + uint32_t memRangeCount, + const VkMappedMemoryRange* pMemRanges) +{ + return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges); +} + +VkResult anv_GetBufferMemoryRequirements( + VkDevice device, + VkBuffer _buffer, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 1; + + pMemoryRequirements->size = buffer->size; + pMemoryRequirements->alignment = 16; + + return VK_SUCCESS; +} + +VkResult anv_GetImageMemoryRequirements( + VkDevice device, + VkImage _image, + VkMemoryRequirements* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_image, image, _image); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<<i` is set if and + * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties + * structure for the physical device is supported. + * + * We support exactly one memory type. + */ + pMemoryRequirements->memoryTypeBits = 1; + + pMemoryRequirements->size = image->size; + pMemoryRequirements->alignment = image->alignment; + + return VK_SUCCESS; +} + +VkResult anv_GetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t* pNumRequirements, + VkSparseImageMemoryRequirements* pSparseMemoryRequirements) +{ + return vk_error(VK_UNSUPPORTED); +} + +VkResult anv_GetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize* pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; + stub_return(VK_SUCCESS); +} + +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer _buffer, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + buffer->bo = &mem->bo; + buffer->offset = memOffset; + + return VK_SUCCESS; +} + +VkResult anv_BindImageMemory( + VkDevice device, + VkImage _image, + VkDeviceMemory _mem, + VkDeviceSize memOffset) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, _mem); + ANV_FROM_HANDLE(anv_image, image, _image); + + image->bo = &mem->bo; + image->offset = memOffset; + + return VK_SUCCESS; +} + +VkResult anv_QueueBindSparseBufferMemory( + VkQueue queue, + VkBuffer buffer, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueBindSparseImageOpaqueMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueBindSparseImageMemory( + VkQueue queue, + VkImage image, + uint32_t numBindings, + const VkSparseImageMemoryBindInfo* pBindInfo) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_CreateFence( + VkDevice _device, + const VkFenceCreateInfo* pCreateInfo, + VkFence* pFence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_fence *fence; + struct anv_batch batch; + VkResult result; + + const uint32_t fence_size = 128; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); + + fence = anv_device_alloc(device, sizeof(*fence), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (fence == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_init_new(&fence->bo, device, fence_size); + if (result != VK_SUCCESS) + goto fail; + + fence->bo.map = + anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size); + batch.next = batch.start = fence->bo.map; + batch.end = fence->bo.map + fence->bo.size; + anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END); + anv_batch_emit(&batch, GEN8_MI_NOOP); + + fence->exec2_objects[0].handle = fence->bo.gem_handle; + fence->exec2_objects[0].relocation_count = 0; + fence->exec2_objects[0].relocs_ptr = 0; + fence->exec2_objects[0].alignment = 0; + fence->exec2_objects[0].offset = fence->bo.offset; + fence->exec2_objects[0].flags = 0; + fence->exec2_objects[0].rsvd1 = 0; + fence->exec2_objects[0].rsvd2 = 0; + + fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; + fence->execbuf.buffer_count = 1; + fence->execbuf.batch_start_offset = 0; + fence->execbuf.batch_len = batch.next - fence->bo.map; + fence->execbuf.cliprects_ptr = 0; + fence->execbuf.num_cliprects = 0; + fence->execbuf.DR1 = 0; + fence->execbuf.DR4 = 0; + + fence->execbuf.flags = + I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; + fence->execbuf.rsvd1 = device->context_id; + fence->execbuf.rsvd2 = 0; + + *pFence = anv_fence_to_handle(fence); + + return VK_SUCCESS; + + fail: + anv_device_free(device, fence); + + return result; +} + +VkResult anv_DestroyFence( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + + anv_gem_munmap(fence->bo.map, fence->bo.size); + anv_gem_close(device, fence->bo.gem_handle); + anv_device_free(device, fence); + + return VK_SUCCESS; +} + +VkResult anv_ResetFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences) +{ + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + fence->ready = false; + } + + return VK_SUCCESS; +} + +VkResult anv_GetFenceStatus( + VkDevice _device, + VkFence _fence) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_fence, fence, _fence); + int64_t t = 0; + int ret; + + if (fence->ready) + return VK_SUCCESS; + + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == 0) { + fence->ready = true; + return VK_SUCCESS; + } + + return VK_NOT_READY; +} + +VkResult anv_WaitForFences( + VkDevice _device, + uint32_t fenceCount, + const VkFence* pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + int64_t t = timeout; + int ret; + + /* FIXME: handle !waitAll */ + + for (uint32_t i = 0; i < fenceCount; i++) { + ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); + ret = anv_gem_wait(device, fence->bo.gem_handle, &t); + if (ret == -1 && errno == ETIME) + return VK_TIMEOUT; + else if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + return VK_SUCCESS; +} + +// Queue semaphore functions + +VkResult anv_CreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo* pCreateInfo, + VkSemaphore* pSemaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_DestroySemaphore( + VkDevice device, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueSignalSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_QueueWaitSemaphore( + VkQueue queue, + VkSemaphore semaphore) +{ + stub_return(VK_UNSUPPORTED); +} + +// Event functions + +VkResult anv_CreateEvent( + VkDevice device, + const VkEventCreateInfo* pCreateInfo, + VkEvent* pEvent) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_DestroyEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_GetEventStatus( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_SetEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_ResetEvent( + VkDevice device, + VkEvent event) +{ + stub_return(VK_UNSUPPORTED); +} + +// Buffer functions + +VkResult anv_CreateBuffer( + VkDevice _device, + const VkBufferCreateInfo* pCreateInfo, + VkBuffer* pBuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = anv_device_alloc(device, sizeof(*buffer), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (buffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->bo = NULL; + buffer->offset = 0; + + *pBuffer = anv_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +VkResult anv_DestroyBuffer( + VkDevice _device, + VkBuffer _buffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + anv_device_free(device, buffer); + + return VK_SUCCESS; +} + +// Buffer view functions + +void +anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range) +{ + const struct anv_format *info; + + info = anv_format_for_vk_format(format); + /* This assumes RGBA float format. */ + uint32_t stride = 4; + uint32_t num_elements = range / stride; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_BUFFER, + .SurfaceArray = false, + .SurfaceFormat = info->surface_format, + .SurfaceVerticalAlignment = VALIGN4, + .SurfaceHorizontalAlignment = HALIGN4, + .TileMode = LINEAR, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + .BaseMipLevel = 0.0, + .SurfaceQPitch = 0, + .Height = (num_elements >> 7) & 0x3fff, + .Width = num_elements & 0x7f, + .Depth = (num_elements >> 21) & 0x3f, + .SurfacePitch = stride - 1, + .MinimumArrayElement = 0, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + .SurfaceMinLOD = 0, + .MIPCountLOD = 0, + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + /* FIXME: We assume that the image must be bound at this time. */ + .SurfaceBaseAddress = { NULL, offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state); +} + +VkResult anv_CreateBufferView( + VkDevice _device, + const VkBufferViewCreateInfo* pCreateInfo, + VkBufferView* pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer); + struct anv_buffer_view *bview; + struct anv_surface_view *view; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO); + + bview = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (bview == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + view = &bview->view; + view->bo = buffer->bo; + view->offset = buffer->offset + pCreateInfo->offset; + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + view->format = pCreateInfo->format; + view->range = pCreateInfo->range; + + anv_fill_buffer_surface_state(view->surface_state.map, + pCreateInfo->format, + view->offset, pCreateInfo->range); + + *pView = anv_buffer_view_to_handle(bview); + + return VK_SUCCESS; +} + +VkResult anv_DestroyBufferView( + VkDevice _device, + VkBufferView _bview) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_buffer_view, bview, _bview); + + anv_surface_view_fini(device, &bview->view); + anv_device_free(device, bview); + + return VK_SUCCESS; +} + +// Sampler functions + +VkResult anv_CreateSampler( + VkDevice _device, + const VkSamplerCreateInfo* pCreateInfo, + VkSampler* pSampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_sampler *sampler; + uint32_t mag_filter, min_filter, max_anisotropy; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = anv_device_alloc(device, sizeof(*sampler), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!sampler) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + static const uint32_t vk_to_gen_tex_filter[] = { + [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST, + [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_mipmap_mode[] = { + [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE, + [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, + [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR + }; + + static const uint32_t vk_to_gen_tex_address[] = { + [VK_TEX_ADDRESS_WRAP] = TCM_WRAP, + [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR, + [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP, + [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE, + [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER, + }; + + static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER, + [VK_COMPARE_OP_LESS] = PREFILTEROPLESS, + [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL, + [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER, + [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL, + [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS, + }; + + if (pCreateInfo->maxAnisotropy > 1) { + mag_filter = MAPFILTER_ANISOTROPIC; + min_filter = MAPFILTER_ANISOTROPIC; + max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2; + } else { + mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter]; + min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter]; + max_anisotropy = RATIO21; + } + + struct GEN8_SAMPLER_STATE sampler_state = { + .SamplerDisable = false, + .TextureBorderColorMode = DX10OGL, + .LODPreClampMode = 0, + .BaseMipLevel = 0.0, + .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode], + .MagModeFilter = mag_filter, + .MinModeFilter = min_filter, + .TextureLODBias = pCreateInfo->mipLodBias * 256, + .AnisotropicAlgorithm = EWAApproximation, + .MinLOD = pCreateInfo->minLod, + .MaxLOD = pCreateInfo->maxLod, + .ChromaKeyEnable = 0, + .ChromaKeyIndex = 0, + .ChromaKeyMode = 0, + .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp], + .CubeSurfaceControlMode = 0, + + .IndirectStatePointer = + device->border_colors.offset + + pCreateInfo->borderColor * sizeof(float) * 4, + + .LODClampMagnificationMode = MIPNONE, + .MaximumAnisotropy = max_anisotropy, + .RAddressMinFilterRoundingEnable = 0, + .RAddressMagFilterRoundingEnable = 0, + .VAddressMinFilterRoundingEnable = 0, + .VAddressMagFilterRoundingEnable = 0, + .UAddressMinFilterRoundingEnable = 0, + .UAddressMagFilterRoundingEnable = 0, + .TrilinearFilterQuality = 0, + .NonnormalizedCoordinateEnable = 0, + .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU], + .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV], + .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW], + }; + + GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state); + + *pSampler = anv_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +VkResult anv_DestroySampler( + VkDevice _device, + VkSampler _sampler) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); + + anv_device_free(device, sampler); + + return VK_SUCCESS; +} + +// Descriptor set functions + +VkResult anv_CreateDescriptorSetLayout( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo* pCreateInfo, + VkDescriptorSetLayout* pSetLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_descriptor_set_layout *set_layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); + + uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t num_dynamic_buffers = 0; + uint32_t count = 0; + uint32_t stages = 0; + uint32_t s; + + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + sampler_count[s] += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + surface_count[s] += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize; + break; + default: + break; + } + + stages |= pCreateInfo->pBinding[i].stageFlags; + count += pCreateInfo->pBinding[i].arraySize; + } + + uint32_t sampler_total = 0; + uint32_t surface_total = 0; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + sampler_total += sampler_count[s]; + surface_total += surface_count[s]; + } + + size_t size = sizeof(*set_layout) + + (sampler_total + surface_total) * sizeof(set_layout->entries[0]); + set_layout = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set_layout) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + set_layout->num_dynamic_buffers = num_dynamic_buffers; + set_layout->count = count; + set_layout->shader_stages = stages; + + struct anv_descriptor_slot *p = set_layout->entries; + struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM]; + struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM]; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + set_layout->stage[s].surface_count = surface_count[s]; + set_layout->stage[s].surface_start = surface[s] = p; + p += surface_count[s]; + set_layout->stage[s].sampler_count = sampler_count[s]; + set_layout->stage[s].sampler_start = sampler[s] = p; + p += sampler_count[s]; + } + + uint32_t descriptor = 0; + int8_t dynamic_slot = 0; + bool is_dynamic; + for (uint32_t i = 0; i < pCreateInfo->count; i++) { + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { + sampler[s]->index = descriptor + j; + sampler[s]->dynamic_slot = -1; + sampler[s]++; + } + break; + default: + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + is_dynamic = true; + break; + default: + is_dynamic = false; + break; + } + + switch (pCreateInfo->pBinding[i].descriptorType) { + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for_each_bit(s, pCreateInfo->pBinding[i].stageFlags) + for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) { + surface[s]->index = descriptor + j; + if (is_dynamic) + surface[s]->dynamic_slot = dynamic_slot + j; + else + surface[s]->dynamic_slot = -1; + surface[s]++; + } + break; + default: + break; + } + + if (is_dynamic) + dynamic_slot += pCreateInfo->pBinding[i].arraySize; + + descriptor += pCreateInfo->pBinding[i].arraySize; + } + + *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDescriptorSetLayout( + VkDevice _device, + VkDescriptorSetLayout _set_layout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout); + + anv_device_free(device, set_layout); + + return VK_SUCCESS; +} + +VkResult anv_CreateDescriptorPool( + VkDevice device, + VkDescriptorPoolUsage poolUsage, + uint32_t maxSets, + const VkDescriptorPoolCreateInfo* pCreateInfo, + VkDescriptorPool* pDescriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub"); + pDescriptorPool->handle = 1; + return VK_SUCCESS; +} + +VkResult anv_DestroyDescriptorPool( + VkDevice _device, + VkDescriptorPool _pool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult anv_ResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool) +{ + anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets"); + return VK_SUCCESS; +} + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set) +{ + struct anv_descriptor_set *set; + size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]); + + set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!set) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* A descriptor set may not be 100% filled. Clear the set so we can can + * later detect holes in it. + */ + memset(set, 0, size); + + *out_set = set; + + return VK_SUCCESS; +} + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set) +{ + anv_device_free(device, set); +} + +VkResult anv_AllocDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + VkDescriptorSetUsage setUsage, + uint32_t count, + const VkDescriptorSetLayout* pSetLayouts, + VkDescriptorSet* pDescriptorSets, + uint32_t* pCount) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + VkResult result; + struct anv_descriptor_set *set; + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]); + + result = anv_descriptor_set_create(device, layout, &set); + if (result != VK_SUCCESS) { + *pCount = i; + return result; + } + + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); + } + + *pCount = count; + + return VK_SUCCESS; +} + +VkResult anv_FreeDescriptorSets( + VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet* pDescriptorSets) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + for (uint32_t i = 0; i < count; i++) { + ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]); + + anv_descriptor_set_destroy(device, set); + } + + return VK_SUCCESS; +} + +VkResult anv_UpdateDescriptorSets( + VkDevice device, + uint32_t writeCount, + const VkWriteDescriptorSet* pDescriptorWrites, + uint32_t copyCount, + const VkCopyDescriptorSet* pDescriptorCopies) +{ + for (uint32_t i = 0; i < writeCount; i++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; + ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet); + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + for (uint32_t j = 0; j < write->count; j++) { + set->descriptors[write->destBinding + j].sampler = + anv_sampler_from_handle(write->pDescriptors[j].sampler); + } + + if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) + break; + + /* fallthrough */ + + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_image_view, iview, + write->pDescriptors[j].imageView); + set->descriptors[write->destBinding + j].view = &iview->view; + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + anv_finishme("texel buffers not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + anv_finishme("input attachments not implemented"); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->count; j++) { + ANV_FROM_HANDLE(anv_buffer_view, bview, + write->pDescriptors[j].bufferView); + set->descriptors[write->destBinding + j].view = &bview->view; + } + + default: + break; + } + } + + for (uint32_t i = 0; i < copyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet); + ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet); + for (uint32_t j = 0; j < copy->count; j++) { + dest->descriptors[copy->destBinding + j] = + src->descriptors[copy->srcBinding + j]; + } + } + + return VK_SUCCESS; +} + +// State object functions + +static inline int64_t +clamp_int64(int64_t x, int64_t min, int64_t max) +{ + if (x < min) + return min; + else if (x < max) + return x; + else + return max; +} + +VkResult anv_CreateDynamicViewportState( + VkDevice _device, + const VkDynamicViewportStateCreateInfo* pCreateInfo, + VkDynamicViewportState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_vp_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + unsigned count = pCreateInfo->viewportAndScissorCount; + state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 64, 64); + state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 8, 32); + state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool, + count * 32, 32); + + for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) { + const VkViewport *vp = &pCreateInfo->pViewports[i]; + const VkRect2D *s = &pCreateInfo->pScissors[i]; + + struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = { + .ViewportMatrixElementm00 = vp->width / 2, + .ViewportMatrixElementm11 = vp->height / 2, + .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2, + .ViewportMatrixElementm30 = vp->originX + vp->width / 2, + .ViewportMatrixElementm31 = vp->originY + vp->height / 2, + .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2, + .XMinClipGuardband = -1.0f, + .XMaxClipGuardband = 1.0f, + .YMinClipGuardband = -1.0f, + .YMaxClipGuardband = 1.0f, + .XMinViewPort = vp->originX, + .XMaxViewPort = vp->originX + vp->width - 1, + .YMinViewPort = vp->originY, + .YMaxViewPort = vp->originY + vp->height - 1, + }; + + struct GEN8_CC_VIEWPORT cc_viewport = { + .MinimumDepth = vp->minDepth, + .MaximumDepth = vp->maxDepth + }; + + /* Since xmax and ymax are inclusive, we have to have xmax < xmin or + * ymax < ymin for empty clips. In case clip x, y, width height are all + * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't + * what we want. Just special case empty clips and produce a canonical + * empty clip. */ + static const struct GEN8_SCISSOR_RECT empty_scissor = { + .ScissorRectangleYMin = 1, + .ScissorRectangleXMin = 1, + .ScissorRectangleYMax = 0, + .ScissorRectangleXMax = 0 + }; + + const int max = 0xffff; + struct GEN8_SCISSOR_RECT scissor = { + /* Do this math using int64_t so overflow gets clamped correctly. */ + .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), + .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + }; + + GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport); + GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport); + + if (s->extent.width <= 0 || s->extent.height <= 0) { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor); + } else { + GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor); + } + } + + *pState = anv_dynamic_vp_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicViewportState( + VkDevice _device, + VkDynamicViewportState _vp_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state); + + anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp); + anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor); + + anv_device_free(device, vp_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicRasterState( + VkDevice _device, + const VkDynamicRasterStateCreateInfo* pCreateInfo, + VkDynamicRasterState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_rs_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .LineWidth = pCreateInfo->lineWidth, + }; + + GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf); + + bool enable_bias = pCreateInfo->depthBias != 0.0f || + pCreateInfo->slopeScaledDepthBias != 0.0f; + struct GEN8_3DSTATE_RASTER raster = { + .GlobalDepthOffsetEnableSolid = enable_bias, + .GlobalDepthOffsetEnableWireframe = enable_bias, + .GlobalDepthOffsetEnablePoint = enable_bias, + .GlobalDepthOffsetConstant = pCreateInfo->depthBias, + .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias, + .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp + }; + + GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster); + + *pState = anv_dynamic_rs_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicRasterState( + VkDevice _device, + VkDynamicRasterState _rs_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state); + + anv_device_free(device, rs_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicColorBlendState( + VkDevice _device, + const VkDynamicColorBlendStateCreateInfo* pCreateInfo, + VkDynamicColorBlendState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_cb_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .BlendConstantColorRed = pCreateInfo->blendConst[0], + .BlendConstantColorGreen = pCreateInfo->blendConst[1], + .BlendConstantColorBlue = pCreateInfo->blendConst[2], + .BlendConstantColorAlpha = pCreateInfo->blendConst[3] + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_cb_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicColorBlendState( + VkDevice _device, + VkDynamicColorBlendState _cb_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state); + + anv_device_free(device, cb_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateDynamicDepthStencilState( + VkDevice _device, + const VkDynamicDepthStencilStateCreateInfo* pCreateInfo, + VkDynamicDepthStencilState* pState) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_dynamic_ds_state *state; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO); + + state = anv_device_alloc(device, sizeof(*state), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (state == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + GEN8_3DSTATE_WM_DEPTH_STENCIL_header, + + /* Is this what we need to do? */ + .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0, + + .StencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + + .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff, + .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff, + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil, + &wm_depth_stencil); + + struct GEN8_COLOR_CALC_STATE color_calc_state = { + .StencilReferenceValue = pCreateInfo->stencilFrontRef, + .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef + }; + + GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state); + + *pState = anv_dynamic_ds_state_to_handle(state); + + return VK_SUCCESS; +} + +VkResult anv_DestroyDynamicDepthStencilState( + VkDevice _device, + VkDynamicDepthStencilState _ds_state) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state); + + anv_device_free(device, ds_state); + + return VK_SUCCESS; +} + +VkResult anv_CreateFramebuffer( + VkDevice _device, + const VkFramebufferCreateInfo* pCreateInfo, + VkFramebuffer* pFramebuffer) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + + sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount; + framebuffer = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (framebuffer == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + ANV_FROM_HANDLE(anv_attachment_view, view, + pCreateInfo->pAttachments[i].view); + + framebuffer->attachments[i] = view; + } + + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + + anv_CreateDynamicViewportState(anv_device_to_handle(device), + &(VkDynamicViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO, + .viewportAndScissorCount = 1, + .pViewports = (VkViewport[]) { + { + .originX = 0, + .originY = 0, + .width = pCreateInfo->width, + .height = pCreateInfo->height, + .minDepth = 0, + .maxDepth = 1 + }, + }, + .pScissors = (VkRect2D[]) { + { { 0, 0 }, + { pCreateInfo->width, pCreateInfo->height } }, + } + }, + &framebuffer->vp_state); + + *pFramebuffer = anv_framebuffer_to_handle(framebuffer); + + return VK_SUCCESS; +} + +VkResult anv_DestroyFramebuffer( + VkDevice _device, + VkFramebuffer _fb) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); + + anv_DestroyDynamicViewportState(anv_device_to_handle(device), + fb->vp_state); + anv_device_free(device, fb); + + return VK_SUCCESS; +} + +VkResult anv_CreateRenderPass( + VkDevice _device, + const VkRenderPassCreateInfo* pCreateInfo, + VkRenderPass* pRenderPass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_render_pass *pass; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + + size = sizeof(*pass) + + pCreateInfo->subpassCount * sizeof(struct anv_subpass); + pass = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pass == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Clear the subpasses along with the parent pass. This required because + * each array member of anv_subpass must be a valid pointer if not NULL. + */ + memset(pass, 0, size); + + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + + size = pCreateInfo->attachmentCount * sizeof(*pass->attachments); + pass->attachments = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + pass->attachments[i].format = pCreateInfo->pAttachments[i].format; + pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples; + pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp; + pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp; + // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + struct anv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputCount; + subpass->color_count = desc->colorCount; + + if (desc->inputCount > 0) { + subpass->input_attachments = + anv_device_alloc(device, desc->inputCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->inputCount; j++) { + subpass->input_attachments[j] + = desc->inputAttachments[j].attachment; + } + } + + if (desc->colorCount > 0) { + subpass->color_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->color_attachments[j] + = desc->colorAttachments[j].attachment; + } + } + + if (desc->resolveAttachments) { + subpass->resolve_attachments = + anv_device_alloc(device, desc->colorCount * sizeof(uint32_t), + 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + + for (uint32_t j = 0; j < desc->colorCount; j++) { + subpass->resolve_attachments[j] + = desc->resolveAttachments[j].attachment; + } + } + + subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment; + } + + *pRenderPass = anv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +VkResult anv_DestroyRenderPass( + VkDevice _device, + VkRenderPass _pass) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_render_pass, pass, _pass); + + anv_device_free(device, pass->attachments); + + for (uint32_t i = 0; i < pass->subpass_count; i++) { + /* In VkSubpassCreateInfo, each of the attachment arrays may be null. + * Don't free the null arrays. + */ + struct anv_subpass *subpass = &pass->subpasses[i]; + + anv_device_free(device, subpass->input_attachments); + anv_device_free(device, subpass->color_attachments); + anv_device_free(device, subpass->resolve_attachments); + } + + anv_device_free(device, pass); + + return VK_SUCCESS; +} + +VkResult anv_GetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D* pGranularity) +{ + *pGranularity = (VkExtent2D) { 1, 1 }; + + return VK_SUCCESS; +} + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) + __attribute__ ((visibility ("default"))); + +void vkCmdDbgMarkerBegin( + VkCmdBuffer cmdBuffer, + const char* pMarker) +{ +} + +void vkCmdDbgMarkerEnd( + VkCmdBuffer cmdBuffer) +{ +} diff --git a/src/vulkan/anv_entrypoints_gen.py b/src/vulkan/anv_entrypoints_gen.py new file mode 100644 index 00000000000..96c4884d158 --- /dev/null +++ b/src/vulkan/anv_entrypoints_gen.py @@ -0,0 +1,269 @@ +# coding=utf-8 +# +# Copyright © 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import fileinput, re, sys + +# Each function typedef in the vulkan.h header is all on one line and matches +# this regepx. We hope that won't change. + +p = re.compile('typedef ([^ ]*) *\(VKAPI \*PFN_vk([^(]*)\)(.*);') + +entrypoints = [] + +# We generate a static hash table for entry point lookup +# (vkGetProcAddress). We use a linear congruential generator for our hash +# function and a power-of-two size table. The prime numbers are determined +# experimentally. + +none = 0xffff +hash_size = 256 +u32_mask = 2**32 - 1 +hash_mask = hash_size - 1 + +prime_factor = 5024183 +prime_step = 19 + +def hash(name): + h = 0; + for c in name: + h = (h * prime_factor + ord(c)) & u32_mask + + return h + +opt_header = False +opt_code = False + +if (sys.argv[1] == "header"): + opt_header = True + sys.argv.pop() +elif (sys.argv[1] == "code"): + opt_code = True + sys.argv.pop() + +# Parse the entry points in the header + +i = 0 +for line in fileinput.input(): + m = p.match(line) + if (m): + if m.group(2) == 'VoidFunction': + continue + fullname = "vk" + m.group(2) + h = hash(fullname) + entrypoints.append((m.group(1), m.group(2), m.group(3), i, h)) + i = i + 1 + +# For outputting entrypoints.h we generate a anv_EntryPoint() prototype +# per entry point. + +if opt_header: + for type, name, args, num, h in entrypoints: + print "%s anv_%s%s;" % (type, name, args) + print "%s anv_validate_%s%s;" % (type, name, args) + exit() + + + +print """/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* DO NOT EDIT! This is a generated file. */ + +#include "anv_private.h" + +struct anv_entrypoint { + uint32_t name; + uint32_t hash; + void *function; + void *validate; +}; + +/* We use a big string constant to avoid lots of reloctions from the entry + * point table to lots of little strings. The entries in the entry point table + * store the index into this big string. + */ + +static const char strings[] =""" + +offsets = [] +i = 0; +for type, name, args, num, h in entrypoints: + print " \"vk%s\\0\"" % name + offsets.append(i) + i += 2 + len(name) + 1 +print """ ; + +/* Weak aliases for all potential validate functions. These will resolve to + * NULL if they're not defined, which lets the resolve_entrypoint() function + * either pick a validate wrapper if available or just plug in the actual + * entry point. + */ +""" + +for type, name, args, num, h in entrypoints: + print "%s anv_validate_%s%s __attribute__ ((weak));" % (type, name, args) + +# Now generate the table of all entry points and their validation functions + +print "\nstatic const struct anv_entrypoint entrypoints[] = {" +for type, name, args, num, h in entrypoints: + print " { %5d, 0x%08x, anv_%s, anv_validate_%s }," % (offsets[num], h, name, name) +print "};\n" + +print """ +#ifdef DEBUG +static bool enable_validate = true; +#else +static bool enable_validate = false; +#endif + +/* We can't use symbols that need resolving (like, oh, getenv) in the resolve + * function. This means that we have to determine whether or not to use the + * validation layer sometime before that. The constructor function attribute asks + * the dynamic linker to invoke determine_validate() at dlopen() time which + * works. + */ +static void __attribute__ ((constructor)) +determine_validate(void) +{ + const char *s = getenv("ANV_VALIDATE"); + + if (s) + enable_validate = atoi(s); +} + +static void * __attribute__ ((noinline)) +resolve_entrypoint(uint32_t index) +{ + if (enable_validate && entrypoints[index].validate) + return entrypoints[index].validate; + + return entrypoints[index].function; +} +""" + +# Now output ifuncs and their resolve helpers for all entry points. The +# resolve helper calls resolve_entrypoint() with the entry point index, which +# lets the resolver look it up in the table. + +for type, name, args, num, h in entrypoints: + print "static void *resolve_%s(void) { return resolve_entrypoint(%d); }" % (name, num) + print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name) + + +# Now generate the hash table used for entry point look up. This is a +# uint16_t table of entry point indices. We use 0xffff to indicate an entry +# in the hash table is empty. + +map = [none for f in xrange(hash_size)] +collisions = [0 for f in xrange(10)] +for type, name, args, num, h in entrypoints: + level = 0 + while map[h & hash_mask] != none: + h = h + prime_step + level = level + 1 + if level > 9: + collisions[9] += 1 + else: + collisions[level] += 1 + map[h & hash_mask] = num + +print "/* Hash table stats:" +print " * size %d entries" % hash_size +print " * collisions entries" +for i in xrange(10): + if (i == 9): + plus = "+" + else: + plus = " " + + print " * %2d%s %4d" % (i, plus, collisions[i]) +print " */\n" + +print "#define none 0x%04x\n" % none + +print "static const uint16_t map[] = {" +for i in xrange(0, hash_size, 8): + print " ", + for j in xrange(i, i + 8): + if map[j] & 0xffff == 0xffff: + print " none,", + else: + print "0x%04x," % (map[j] & 0xffff), + print + +print "};" + +# Finally we generate the hash table lookup function. The hash function and +# linear probing algorithm matches the hash table generated above. + +print """ +void * +anv_lookup_entrypoint(const char *name) +{ + static const uint32_t prime_factor = %d; + static const uint32_t prime_step = %d; + const struct anv_entrypoint *e; + uint32_t hash, h, i; + const char *p; + + hash = 0; + for (p = name; *p; p++) + hash = hash * prime_factor + *p; + + h = hash; + do { + i = map[h & %d]; + if (i == none) + return NULL; + e = &entrypoints[i]; + h += prime_step; + } while (e->hash != hash); + + if (strcmp(name, strings + e->name) != 0) + return NULL; + + return resolve_entrypoint(i); +} +""" % (prime_factor, prime_step, hash_mask) diff --git a/src/vulkan/anv_formats.c b/src/vulkan/anv_formats.c new file mode 100644 index 00000000000..3cbcff5730f --- /dev/null +++ b/src/vulkan/anv_formats.c @@ -0,0 +1,334 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#define UNSUPPORTED 0xffff + +#define fmt(__vk_fmt, ...) \ + [__vk_fmt] = { .name = #__vk_fmt, __VA_ARGS__ } + +static const struct anv_format anv_formats[] = { + fmt(VK_FORMAT_UNDEFINED, RAW, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R4G4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R4G4B4A4_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G6B5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_R5G5B5A1_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_R8_UNORM, R8_UNORM, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SNORM, R8_SNORM, .cpp = 1, .num_channels = 1,), + fmt(VK_FORMAT_R8_USCALED, R8_USCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SSCALED, R8_SSCALED, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_UINT, R8_UINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SINT, R8_SINT, .cpp = 1, .num_channels = 1), + fmt(VK_FORMAT_R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_R8G8_UNORM, R8G8_UNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SNORM, R8G8_SNORM, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_USCALED, R8G8_USCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SSCALED, R8G8_SSCALED, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_UINT, R8G8_UINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SINT, R8G8_SINT, .cpp = 2, .num_channels = 2), + fmt(VK_FORMAT_R8G8_SRGB, UNSUPPORTED), /* L8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8_UNORM, R8G8B8X8_UNORM, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_UINT, R8G8B8_UINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SINT, R8G8B8_SINT, .cpp = 3, .num_channels = 3), + fmt(VK_FORMAT_R8G8B8_SRGB, UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */ + fmt(VK_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R10G10B10A2_SINT, R10G10B10A2_SINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_R16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SNORM, R16_SNORM, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_USCALED, R16_USCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SSCALED, R16_SSCALED, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_UINT, R16_UINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SINT, R16_SINT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16_SFLOAT, R16_FLOAT, .cpp = 2, .num_channels = 1), + fmt(VK_FORMAT_R16G16_UNORM, R16G16_UNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SNORM, R16G16_SNORM, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_USCALED, R16G16_USCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SSCALED, R16G16_SSCALED, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_UINT, R16G16_UINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SINT, R16G16_SINT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16_SFLOAT, R16G16_FLOAT, .cpp = 4, .num_channels = 2), + fmt(VK_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_UINT, R16G16B16_UINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SINT, R16G16B16_SINT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16_SFLOAT, R16G16B16_FLOAT, .cpp = 6, .num_channels = 3), + fmt(VK_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R16G16B16A16_SFLOAT, R16G16B16A16_FLOAT, .cpp = 8, .num_channels = 4), + fmt(VK_FORMAT_R32_UINT, R32_UINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SINT, R32_SINT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1,), + fmt(VK_FORMAT_R32G32_UINT, R32G32_UINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SINT, R32G32_SINT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32_SFLOAT, R32G32_FLOAT, .cpp = 8, .num_channels = 2,), + fmt(VK_FORMAT_R32G32B32_UINT, R32G32B32_UINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SINT, R32G32B32_SINT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32_SFLOAT, R32G32B32_FLOAT, .cpp = 12, .num_channels = 3,), + fmt(VK_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R32G32B32A32_SFLOAT, R32G32B32A32_FLOAT, .cpp = 16, .num_channels = 4,), + fmt(VK_FORMAT_R64_SFLOAT, R64_FLOAT, .cpp = 8, .num_channels = 1), + fmt(VK_FORMAT_R64G64_SFLOAT, R64G64_FLOAT, .cpp = 16, .num_channels = 2), + fmt(VK_FORMAT_R64G64B64_SFLOAT, R64G64B64_FLOAT, .cpp = 24, .num_channels = 3), + fmt(VK_FORMAT_R64G64B64A64_SFLOAT, R64G64B64A64_FLOAT, .cpp = 32, .num_channels = 4), + fmt(VK_FORMAT_R11G11B10_UFLOAT, R11G11B10_FLOAT, .cpp = 4, .num_channels = 3), + fmt(VK_FORMAT_R9G9B9E5_UFLOAT, R9G9B9E5_SHAREDEXP, .cpp = 4, .num_channels = 3), + + fmt(VK_FORMAT_D16_UNORM, R16_UNORM, .cpp = 2, .num_channels = 1, .depth_format = D16_UNORM), + fmt(VK_FORMAT_D24_UNORM, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 1, .depth_format = D24_UNORM_X8_UINT), + fmt(VK_FORMAT_D32_SFLOAT, R32_FLOAT, .cpp = 4, .num_channels = 1, .depth_format = D32_FLOAT), + fmt(VK_FORMAT_S8_UINT, R8_UINT, .cpp = 1, .num_channels = 1, .has_stencil = true), + fmt(VK_FORMAT_D16_UNORM_S8_UINT, R16_UNORM, .cpp = 2, .num_channels = 2, .depth_format = D16_UNORM, .has_stencil = true), + fmt(VK_FORMAT_D24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS, .cpp = 4, .num_channels = 2, .depth_format = D24_UNORM_X8_UINT, .has_stencil = true), + fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, R32_FLOAT, .cpp = 4, .num_channels = 2, .depth_format = D32_FLOAT, .has_stencil = true), + + fmt(VK_FORMAT_BC1_RGB_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGB_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC1_RGBA_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC2_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC2_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC3_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC3_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_BC4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC4_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC5_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_UFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC6H_SFLOAT, UNSUPPORTED), + fmt(VK_FORMAT_BC7_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_BC7_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_EAC_R11G11_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_4x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x4_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_5x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_6x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_8x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x5_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x6_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_10x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x10_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_ASTC_12x12_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM, .cpp = 2, .num_channels = 4), + fmt(VK_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM, .cpp = 2, .num_channels = 3), + fmt(VK_FORMAT_B5G6R5_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8_SRGB, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B8G8R8A8_SNORM, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_USCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SSCALED, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_UINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SINT, UNSUPPORTED), + fmt(VK_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT, .cpp = 4, .num_channels = 4), + fmt(VK_FORMAT_B10G10R10A2_SINT, B10G10R10A2_SINT, .cpp = 4, .num_channels = 4) +}; + +#undef fmt + +const struct anv_format * +anv_format_for_vk_format(VkFormat format) +{ + return &anv_formats[format]; +} + +bool +anv_is_vk_format_depth_or_stencil(VkFormat format) +{ + const struct anv_format *format_info = + anv_format_for_vk_format(format); + + if (format_info->depth_format != UNSUPPORTED && + format_info->depth_format != 0) + return true; + + return format_info->has_stencil; +} + +// Format capabilities + +struct surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; +}; + +extern const struct surface_format_info surface_formats[]; + +VkResult anv_validate_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + const struct anv_format *format = anv_format_for_vk_format(_format); + fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name); + return anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties); +} + +VkResult anv_GetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat _format, + VkFormatProperties* pFormatProperties) +{ + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + const struct surface_format_info *info; + int gen; + + const struct anv_format *format = anv_format_for_vk_format(_format); + if (format == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + gen = physical_device->info->gen * 10; + if (physical_device->info->is_haswell) + gen += 5; + + if (format->surface_format == UNSUPPORTED) + goto unsupported; + + info = &surface_formats[format->surface_format]; + if (!info->exists) + goto unsupported; + + uint32_t linear = 0, tiled = 0; + if (info->sampling <= gen) { + linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + } + if (info->render_target <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + } + if (info->alpha_blend <= gen) { + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + if (info->input_vb <= gen) { + linear |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + } + + pFormatProperties->linearTilingFeatures = linear; + pFormatProperties->optimalTilingFeatures = tiled; + + return VK_SUCCESS; + + unsupported: + pFormatProperties->linearTilingFeatures = 0; + pFormatProperties->optimalTilingFeatures = 0; + + return VK_SUCCESS; +} + +VkResult anv_GetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageFormatProperties* pImageFormatProperties) +{ + /* TODO: We should do something here. Chad? */ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_GetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t* pNumProperties, + VkSparseImageFormatProperties* pProperties) +{ + stub_return(VK_UNSUPPORTED); +} diff --git a/src/vulkan/anv_gem.c b/src/vulkan/anv_gem.c new file mode 100644 index 00000000000..01671d2ea50 --- /dev/null +++ b/src/vulkan/anv_gem.c @@ -0,0 +1,279 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + +static int +anv_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_CREATE. + * + * Return gem handle, or 0 on failure. Gem handles are never 0. + */ +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + struct drm_i915_gem_create gem_create; + int ret; + + VG_CLEAR(gem_create); + gem_create.size = size; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (ret != 0) { + /* FIXME: What do we do if this fails? */ + return 0; + } + + return gem_create.handle; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + struct drm_gem_close close; + + VG_CLEAR(close); + close.handle = gem_handle; + anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +/** + * Wrapper around DRM_IOCTL_I915_GEM_MMAP. + */ +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + struct drm_i915_gem_mmap gem_mmap; + int ret; + + gem_mmap.handle = gem_handle; + VG_CLEAR(gem_mmap.pad); + gem_mmap.offset = offset; + gem_mmap.size = size; + VG_CLEAR(gem_mmap.addr_ptr); + +#ifdef I915_MMAP_WC + gem_mmap.flags = 0; +#endif + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); + if (ret != 0) { + /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ + return NULL; + } + + VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); + return (void *)(uintptr_t) gem_mmap.addr_ptr; +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + VG(VALGRIND_FREELIKE_BLOCK(p, 0)); + munmap(p, size); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + struct drm_i915_gem_userptr userptr; + int ret; + + VG_CLEAR(userptr); + userptr.user_ptr = (__u64)((unsigned long) mem); + userptr.user_size = size; + userptr.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret == -1) + return 0; + + return userptr.handle; +} + +/** + * On error, \a timeout_ns holds the remaining time. + */ +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + struct drm_i915_gem_wait wait; + int ret; + + VG_CLEAR(wait); + wait.bo_handle = gem_handle; + wait.timeout_ns = *timeout_ns; + wait.flags = 0; + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + *timeout_ns = wait.timeout_ns; + + return ret; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf); +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + /* set_tiling overwrites the input on the error path, so we have to open + * code anv_ioctl. + */ + + do { + VG_CLEAR(set_tiling); + set_tiling.handle = gem_handle; + set_tiling.tiling_mode = I915_TILING_X; + set_tiling.stride = stride; + + ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + drm_i915_getparam_t gp; + int ret, tmp; + + VG_CLEAR(gp); + gp.param = param; + gp.value = &tmp; + ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0) + return tmp; + + return 0; +} + +int +anv_gem_create_context(struct anv_device *device) +{ + struct drm_i915_gem_context_create create; + int ret; + + VG_CLEAR(create); + + ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret == -1) + return -1; + + return create.ctx_id; +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + struct drm_i915_gem_context_destroy destroy; + + VG_CLEAR(destroy); + destroy.ctx_id = context; + + return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + struct drm_i915_gem_get_aperture aperture; + int ret; + + VG_CLEAR(aperture); + ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret == -1) + return -1; + + *size = aperture.aper_available_size; + + return 0; +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.handle = gem_handle; + args.flags = DRM_CLOEXEC; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return -1; + + return args.fd; +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + struct drm_prime_handle args; + int ret; + + VG_CLEAR(args); + args.fd = fd; + + ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret == -1) + return 0; + + return args.handle; +} diff --git a/src/vulkan/anv_gem_stubs.c b/src/vulkan/anv_gem_stubs.c new file mode 100644 index 00000000000..d036314c446 --- /dev/null +++ b/src/vulkan/anv_gem_stubs.c @@ -0,0 +1,136 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define _DEFAULT_SOURCE + +#include <linux/memfd.h> +#include <sys/mman.h> +#include <sys/syscall.h> + +#include "anv_private.h" + +static inline int +memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +uint32_t +anv_gem_create(struct anv_device *device, size_t size) +{ + int fd = memfd_create("fake bo", MFD_CLOEXEC); + if (fd == -1) + return 0; + + assert(fd != 0); + + if (ftruncate(fd, size) == -1) + return 0; + + return fd; +} + +void +anv_gem_close(struct anv_device *device, int gem_handle) +{ + close(gem_handle); +} + +void* +anv_gem_mmap(struct anv_device *device, uint32_t gem_handle, + uint64_t offset, uint64_t size) +{ + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + gem_handle, offset); +} + +/* This is just a wrapper around munmap, but it also notifies valgrind that + * this map is no longer valid. Pair this with anv_gem_mmap(). + */ +void +anv_gem_munmap(void *p, uint64_t size) +{ + munmap(p, size); +} + +int +anv_gem_userptr(struct anv_device *device, void *mem, size_t size) +{ + return -1; +} + +int +anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns) +{ + return 0; +} + +int +anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf) +{ + return 0; +} + +int +anv_gem_set_tiling(struct anv_device *device, + int gem_handle, uint32_t stride, uint32_t tiling) +{ + return 0; +} + +int +anv_gem_get_param(int fd, uint32_t param) +{ + unreachable("Unused"); +} + +int +anv_gem_create_context(struct anv_device *device) +{ + unreachable("Unused"); +} + +int +anv_gem_destroy_context(struct anv_device *device, int context) +{ + unreachable("Unused"); +} + +int +anv_gem_get_aperture(int fd, uint64_t *size) +{ + unreachable("Unused"); +} + +int +anv_gem_handle_to_fd(struct anv_device *device, int gem_handle) +{ + unreachable("Unused"); +} + +int +anv_gem_fd_to_handle(struct anv_device *device, int fd) +{ + unreachable("Unused"); +} diff --git a/src/vulkan/anv_image.c b/src/vulkan/anv_image.c new file mode 100644 index 00000000000..21099cb7730 --- /dev/null +++ b/src/vulkan/anv_image.c @@ -0,0 +1,745 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +struct anv_image_view_info { + uint8_t surface_type; /**< RENDER_SURFACE_STATE.SurfaceType */ + bool is_array:1; /**< RENDER_SURFACE_STATE.SurfaceArray */ + bool is_cube:1; /**< RENDER_SURFACE_STATE.CubeFaceEnable* */ +}; + +static const uint8_t anv_halign[] = { + [4] = HALIGN4, + [8] = HALIGN8, + [16] = HALIGN16, +}; + +static const uint8_t anv_valign[] = { + [4] = VALIGN4, + [8] = VALIGN8, + [16] = VALIGN16, +}; + +static const uint8_t anv_surf_type_from_image_type[] = { + [VK_IMAGE_TYPE_1D] = SURFTYPE_1D, + [VK_IMAGE_TYPE_2D] = SURFTYPE_2D, + [VK_IMAGE_TYPE_3D] = SURFTYPE_3D, + +}; + +static const struct anv_image_view_info +anv_image_view_info_table[] = { + #define INFO(s, ...) { .surface_type = s, __VA_ARGS__ } + [VK_IMAGE_VIEW_TYPE_1D] = INFO(SURFTYPE_1D), + [VK_IMAGE_VIEW_TYPE_2D] = INFO(SURFTYPE_2D), + [VK_IMAGE_VIEW_TYPE_3D] = INFO(SURFTYPE_3D), + [VK_IMAGE_VIEW_TYPE_CUBE] = INFO(SURFTYPE_CUBE, .is_cube = 1), + [VK_IMAGE_VIEW_TYPE_1D_ARRAY] = INFO(SURFTYPE_1D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_2D_ARRAY] = INFO(SURFTYPE_2D, .is_array = 1), + [VK_IMAGE_VIEW_TYPE_CUBE_ARRAY] = INFO(SURFTYPE_CUBE, .is_array = 1, .is_cube = 1), + #undef INFO +}; + +static const struct anv_surf_type_limits { + int32_t width; + int32_t height; + int32_t depth; +} anv_surf_type_limits[] = { + [SURFTYPE_1D] = {16384, 0, 2048}, + [SURFTYPE_2D] = {16384, 16384, 2048}, + [SURFTYPE_3D] = {2048, 2048, 2048}, + [SURFTYPE_CUBE] = {16384, 16384, 340}, + [SURFTYPE_BUFFER] = {128, 16384, 64}, + [SURFTYPE_STRBUF] = {128, 16384, 64}, +}; + +static const struct anv_tile_info { + uint32_t width; + uint32_t height; + + /** + * Alignment for RENDER_SURFACE_STATE.SurfaceBaseAddress. + * + * To simplify calculations, the alignments defined in the table are + * sometimes larger than required. For example, Skylake requires that X and + * Y tiled buffers be aligned to 4K, but Broadwell permits smaller + * alignment. We choose 4K to accomodate both chipsets. The alignment of + * a linear buffer depends on its element type and usage. Linear depth + * buffers have the largest alignment, 64B, so we choose that for all linear + * buffers. + */ + uint32_t surface_alignment; +} anv_tile_info_table[] = { + [LINEAR] = { 1, 1, 64 }, + [XMAJOR] = { 512, 8, 4096 }, + [YMAJOR] = { 128, 32, 4096 }, + [WMAJOR] = { 128, 32, 4096 }, +}; + +static uint32_t +anv_image_choose_tile_mode(const struct anv_image_create_info *anv_info) +{ + if (anv_info->force_tile_mode) + return anv_info->tile_mode; + + if (anv_info->vk_info->format == VK_FORMAT_S8_UINT) + return WMAJOR; + + switch (anv_info->vk_info->tiling) { + case VK_IMAGE_TILING_LINEAR: + return LINEAR; + case VK_IMAGE_TILING_OPTIMAL: + return YMAJOR; + default: + assert(!"bad VKImageTiling"); + return LINEAR; + } +} + +static VkResult +anv_image_make_surface(const struct anv_image_create_info *create_info, + uint64_t *inout_image_size, + uint32_t *inout_image_alignment, + struct anv_surface *out_surface) +{ + /* See RENDER_SURFACE_STATE.SurfaceQPitch */ + static const uint16_t min_qpitch UNUSED = 0x4; + static const uint16_t max_qpitch UNUSED = 0x1ffc; + + const VkExtent3D *restrict extent = &create_info->vk_info->extent; + const uint32_t levels = create_info->vk_info->mipLevels; + const uint32_t array_size = create_info->vk_info->arraySize; + + const uint8_t tile_mode = anv_image_choose_tile_mode(create_info); + + const struct anv_tile_info *tile_info = + &anv_tile_info_table[tile_mode]; + + const struct anv_format *format_info = + anv_format_for_vk_format(create_info->vk_info->format); + + const uint32_t i = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t j = 4; /* FINISHME: Stop hardcoding subimage alignment */ + const uint32_t w0 = align_u32(extent->width, i); + const uint32_t h0 = align_u32(extent->height, j); + + uint16_t qpitch; + uint32_t mt_width; + uint32_t mt_height; + + if (levels == 1 && array_size == 1) { + qpitch = min_qpitch; + mt_width = w0; + mt_height = h0; + } else { + uint32_t w1 = align_u32(anv_minify(extent->width, 1), i); + uint32_t h1 = align_u32(anv_minify(extent->height, 1), j); + uint32_t w2 = align_u32(anv_minify(extent->width, 2), i); + + qpitch = h0 + h1 + 11 * j; + mt_width = MAX(w0, w1 + w2); + mt_height = array_size * qpitch; + } + + assert(qpitch >= min_qpitch); + if (qpitch > max_qpitch) { + anv_loge("image qpitch > 0x%x\n", max_qpitch); + return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + /* From the Broadwell PRM, RENDER_SURFACE_STATE.SurfaceQpitch: + * + * This field must be set an integer multiple of the Surface Vertical + * Alignment. + */ + assert(anv_is_aligned(qpitch, j)); + + const uint32_t stride = align_u32(mt_width * format_info->cpp, + tile_info->width); + const uint32_t size = stride * align_u32(mt_height, tile_info->height); + const uint32_t offset = align_u32(*inout_image_size, + tile_info->surface_alignment); + + *inout_image_size = offset + size; + *inout_image_alignment = MAX(*inout_image_alignment, + tile_info->surface_alignment); + + *out_surface = (struct anv_surface) { + .offset = offset, + .stride = stride, + .tile_mode = tile_mode, + .qpitch = qpitch, + .h_align = i, + .v_align = j, + }; + + return VK_SUCCESS; +} + +VkResult +anv_image_create(VkDevice _device, + const struct anv_image_create_info *create_info, + VkImage *pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + const VkImageCreateInfo *pCreateInfo = create_info->vk_info; + const VkExtent3D *restrict extent = &pCreateInfo->extent; + struct anv_image *image = NULL; + VkResult r; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->imageType == VK_IMAGE_TYPE_2D); + anv_assert(pCreateInfo->mipLevels > 0); + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->samples == 1); + anv_assert(pCreateInfo->extent.width > 0); + anv_assert(pCreateInfo->extent.height > 0); + anv_assert(pCreateInfo->extent.depth > 0); + + /* TODO(chadv): How should we validate inputs? */ + const uint8_t surf_type = + anv_surf_type_from_image_type[pCreateInfo->imageType]; + + const struct anv_surf_type_limits *limits = + &anv_surf_type_limits[surf_type]; + + if (extent->width > limits->width || + extent->height > limits->height || + extent->depth > limits->depth) { + /* TODO(chadv): What is the correct error? */ + anv_loge("image extent is too large"); + return vk_error(VK_ERROR_INVALID_MEMORY_SIZE); + } + + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (!image) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(image, 0, sizeof(*image)); + image->type = pCreateInfo->imageType; + image->extent = pCreateInfo->extent; + image->format = pCreateInfo->format; + image->levels = pCreateInfo->mipLevels; + image->array_size = pCreateInfo->arraySize; + image->surf_type = surf_type; + + if (likely(!format_info->has_stencil || format_info->depth_format)) { + /* The image's primary surface is a color or depth surface. */ + r = anv_image_make_surface(create_info, &image->size, &image->alignment, + &image->primary_surface); + if (r != VK_SUCCESS) + goto fail; + } + + if (format_info->has_stencil) { + /* From the GPU's perspective, the depth buffer and stencil buffer are + * separate buffers. From Vulkan's perspective, though, depth and + * stencil reside in the same image. To satisfy Vulkan and the GPU, we + * place the depth and stencil buffers in the same bo. + */ + VkImageCreateInfo stencil_info = *pCreateInfo; + stencil_info.format = VK_FORMAT_S8_UINT; + + r = anv_image_make_surface( + &(struct anv_image_create_info) { + .vk_info = &stencil_info, + }, + &image->size, &image->alignment, &image->stencil_surface); + + if (r != VK_SUCCESS) + goto fail; + } + + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + +fail: + if (image) + anv_device_free(device, image); + + return r; +} + +VkResult +anv_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + VkImage *pImage) +{ + return anv_image_create(device, + &(struct anv_image_create_info) { + .vk_info = pCreateInfo, + }, + pImage); +} + +VkResult +anv_DestroyImage(VkDevice _device, VkImage _image) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + anv_device_free(device, anv_image_from_handle(_image)); + + return VK_SUCCESS; +} + +VkResult anv_GetImageSubresourceLayout( + VkDevice device, + VkImage image, + const VkImageSubresource* pSubresource, + VkSubresourceLayout* pLayout) +{ + stub_return(VK_UNSUPPORTED); +} + +void +anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view) +{ + anv_state_pool_free(&device->surface_state_pool, view->surface_state); +} + +void +anv_image_view_init(struct anv_image_view *iview, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + + const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + struct anv_surface_view *view = &iview->view; + struct anv_surface *surface; + + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + const struct anv_image_view_info *view_type_info + = &anv_image_view_info_table[pCreateInfo->viewType]; + + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D) + anv_finishme("non-2D image views"); + + switch (pCreateInfo->subresourceRange.aspect) { + case VK_IMAGE_ASPECT_STENCIL: + anv_finishme("stencil image views"); + abort(); + break; + case VK_IMAGE_ASPECT_DEPTH: + case VK_IMAGE_ASPECT_COLOR: + view->offset = image->offset; + surface = &image->primary_surface; + break; + default: + unreachable(""); + break; + } + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = pCreateInfo->format; + + iview->extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, range->baseMipLevel), + .height = anv_minify(image->extent.height, range->baseMipLevel), + .depth = anv_minify(image->extent.depth, range->baseMipLevel), + }; + + uint32_t depth = 1; + if (range->arraySize > 1) { + depth = range->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + static const uint32_t vk_to_gen_swizzle[] = { + [VK_CHANNEL_SWIZZLE_ZERO] = SCS_ZERO, + [VK_CHANNEL_SWIZZLE_ONE] = SCS_ONE, + [VK_CHANNEL_SWIZZLE_R] = SCS_RED, + [VK_CHANNEL_SWIZZLE_G] = SCS_GREEN, + [VK_CHANNEL_SWIZZLE_B] = SCS_BLUE, + [VK_CHANNEL_SWIZZLE_A] = SCS_ALPHA + }; + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = view_type_info->surface_type, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = range->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For sampler surfaces, the hardware interprets field MIPCount/LOD as + * MIPCount. The range of levels accessible by the sampler engine is + * [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. + */ + .MIPCountLOD = range->mipLevels - 1, + .SurfaceMinLOD = range->baseMipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = vk_to_gen_swizzle[pCreateInfo->channels.r], + .ShaderChannelSelectGreen = vk_to_gen_swizzle[pCreateInfo->channels.g], + .ShaderChannelSelectBlue = vk_to_gen_swizzle[pCreateInfo->channels.b], + .ShaderChannelSelectAlpha = vk_to_gen_swizzle[pCreateInfo->channels.a], + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +VkResult +anv_validate_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + const VkImageSubresourceRange *subresource; + const struct anv_image_view_info *view_info; + const struct anv_format *view_format_info; + const struct anv_format *image_format_info; + + /* Validate structure type before dereferencing it. */ + assert(pCreateInfo); + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO); + subresource = &pCreateInfo->subresourceRange; + + /* Validate viewType is in range before using it. */ + assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE); + assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE); + view_info = &anv_image_view_info_table[pCreateInfo->viewType]; + + /* Validate format is in range before using it. */ + assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE); + assert(pCreateInfo->format <= VK_FORMAT_END_RANGE); + image_format_info = anv_format_for_vk_format(image->format); + view_format_info = anv_format_for_vk_format(pCreateInfo->format); + + /* Validate channel swizzles. */ + assert(pCreateInfo->channels.r >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.r <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.g >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.g <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.b >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.b <= VK_CHANNEL_SWIZZLE_END_RANGE); + assert(pCreateInfo->channels.a >= VK_CHANNEL_SWIZZLE_BEGIN_RANGE); + assert(pCreateInfo->channels.a <= VK_CHANNEL_SWIZZLE_END_RANGE); + + /* Validate subresource. */ + assert(subresource->aspect >= VK_IMAGE_ASPECT_BEGIN_RANGE); + assert(subresource->aspect <= VK_IMAGE_ASPECT_END_RANGE); + assert(subresource->mipLevels > 0); + assert(subresource->arraySize > 0); + assert(subresource->baseMipLevel < image->levels); + assert(subresource->baseMipLevel + subresource->mipLevels <= image->levels); + assert(subresource->baseArraySlice < image->array_size); + assert(subresource->baseArraySlice + subresource->arraySize <= image->array_size); + assert(pView); + + if (view_info->is_cube) { + assert(subresource->baseArraySlice % 6 == 0); + assert(subresource->arraySize % 6 == 0); + } + + /* Validate format. */ + switch (subresource->aspect) { + case VK_IMAGE_ASPECT_COLOR: + assert(!image_format_info->depth_format); + assert(!image_format_info->has_stencil); + assert(!view_format_info->depth_format); + assert(!view_format_info->has_stencil); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_DEPTH: + assert(image_format_info->depth_format); + assert(view_format_info->depth_format); + assert(view_format_info->cpp == image_format_info->cpp); + break; + case VK_IMAGE_ASPECT_STENCIL: + /* FINISHME: Is it legal to have an R8 view of S8? */ + assert(image_format_info->has_stencil); + assert(view_format_info->has_stencil); + break; + default: + assert(!"bad VkImageAspect"); + break; + } + + return anv_CreateImageView(_device, pCreateInfo, pView); +} + +VkResult +anv_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + VkImageView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_image_view *view; + + view = anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_image_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_image_view_to_handle(view); + + return VK_SUCCESS; +} + +VkResult +anv_DestroyImageView(VkDevice _device, VkImageView _iview) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image_view, iview, _iview); + + anv_surface_view_fini(device, &iview->view); + anv_device_free(device, iview); + + return VK_SUCCESS; +} + +void +anv_color_attachment_view_init(struct anv_color_attachment_view *aview, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface_view *view = &aview->view; + struct anv_surface *surface = &image->primary_surface; + const struct anv_format *format_info = + anv_format_for_vk_format(pCreateInfo->format); + + aview->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_COLOR; + + anv_assert(pCreateInfo->arraySize > 0); + anv_assert(pCreateInfo->mipLevel < image->levels); + anv_assert(pCreateInfo->baseArraySlice + pCreateInfo->arraySize <= image->array_size); + + view->bo = image->bo; + view->offset = image->offset + surface->offset; + view->format = pCreateInfo->format; + + aview->base.extent = (VkExtent3D) { + .width = anv_minify(image->extent.width, pCreateInfo->mipLevel), + .height = anv_minify(image->extent.height, pCreateInfo->mipLevel), + .depth = anv_minify(image->extent.depth, pCreateInfo->mipLevel), + }; + + uint32_t depth = 1; + if (pCreateInfo->arraySize > 1) { + depth = pCreateInfo->arraySize; + } else if (image->extent.depth > 1) { + depth = image->extent.depth; + } + + if (cmd_buffer) { + view->surface_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64); + } else { + view->surface_state = + anv_state_pool_alloc(&device->surface_state_pool, 64, 64); + } + + struct GEN8_RENDER_SURFACE_STATE surface_state = { + .SurfaceType = SURFTYPE_2D, + .SurfaceArray = image->array_size > 1, + .SurfaceFormat = format_info->surface_format, + .SurfaceVerticalAlignment = anv_valign[surface->v_align], + .SurfaceHorizontalAlignment = anv_halign[surface->h_align], + .TileMode = surface->tile_mode, + .VerticalLineStride = 0, + .VerticalLineStrideOffset = 0, + .SamplerL2BypassModeDisable = true, + .RenderCacheReadWriteMode = WriteOnlyCache, + .MemoryObjectControlState = GEN8_MOCS, + + /* The driver sets BaseMipLevel in SAMPLER_STATE, not here in + * RENDER_SURFACE_STATE. The Broadwell PRM says "it is illegal to have + * both Base Mip Level fields nonzero". + */ + .BaseMipLevel = 0.0, + + .SurfaceQPitch = surface->qpitch >> 2, + .Height = image->extent.height - 1, + .Width = image->extent.width - 1, + .Depth = depth - 1, + .SurfacePitch = surface->stride - 1, + .MinimumArrayElement = pCreateInfo->baseArraySlice, + .NumberofMultisamples = MULTISAMPLECOUNT_1, + .XOffset = 0, + .YOffset = 0, + + /* For render target surfaces, the hardware interprets field MIPCount/LOD as + * LOD. The Broadwell PRM says: + * + * MIPCountLOD defines the LOD that will be rendered into. + * SurfaceMinLOD is ignored. + */ + .SurfaceMinLOD = 0, + .MIPCountLOD = pCreateInfo->mipLevel, + + .AuxiliarySurfaceMode = AUX_NONE, + .RedClearColor = 0, + .GreenClearColor = 0, + .BlueClearColor = 0, + .AlphaClearColor = 0, + .ShaderChannelSelectRed = SCS_RED, + .ShaderChannelSelectGreen = SCS_GREEN, + .ShaderChannelSelectBlue = SCS_BLUE, + .ShaderChannelSelectAlpha = SCS_ALPHA, + .ResourceMinLOD = 0.0, + .SurfaceBaseAddress = { NULL, view->offset }, + }; + + GEN8_RENDER_SURFACE_STATE_pack(NULL, view->surface_state.map, &surface_state); +} + +static void +anv_depth_stencil_view_init(struct anv_depth_stencil_view *view, + const VkAttachmentViewCreateInfo *pCreateInfo) +{ + ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image); + struct anv_surface *depth_surface = &image->primary_surface; + struct anv_surface *stencil_surface = &image->stencil_surface; + const struct anv_format *format = + anv_format_for_vk_format(image->format); + + view->base.attachment_type = ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL; + + /* XXX: We don't handle any of these */ + anv_assert(pCreateInfo->mipLevel == 0); + anv_assert(pCreateInfo->baseArraySlice == 0); + anv_assert(pCreateInfo->arraySize == 1); + + view->bo = image->bo; + + view->depth_stride = depth_surface->stride; + view->depth_offset = image->offset + depth_surface->offset; + view->depth_format = format->depth_format; + view->depth_qpitch = 0; /* FINISHME: QPitch */ + + view->stencil_stride = stencil_surface->stride; + view->stencil_offset = image->offset + stencil_surface->offset; + view->stencil_qpitch = 0; /* FINISHME: QPitch */ +} + +VkResult +anv_CreateAttachmentView(VkDevice _device, + const VkAttachmentViewCreateInfo *pCreateInfo, + VkAttachmentView *pView) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO); + + if (anv_is_vk_format_depth_or_stencil(pCreateInfo->format)) { + struct anv_depth_stencil_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_depth_stencil_view_init(view, pCreateInfo); + + *pView = anv_attachment_view_to_handle(&view->base); + } else { + struct anv_color_attachment_view *view = + anv_device_alloc(device, sizeof(*view), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (view == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + anv_color_attachment_view_init(view, device, pCreateInfo, NULL); + + *pView = anv_attachment_view_to_handle(&view->base); + } + + return VK_SUCCESS; +} + +VkResult +anv_DestroyAttachmentView(VkDevice _device, VkAttachmentView _view) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_attachment_view, view, _view); + + if (view->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR) { + struct anv_color_attachment_view *aview = + (struct anv_color_attachment_view *)view; + + anv_surface_view_fini(device, &aview->view); + } + + anv_device_free(device, view); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_intel.c b/src/vulkan/anv_intel.c new file mode 100644 index 00000000000..9fc06aef6f8 --- /dev/null +++ b/src/vulkan/anv_intel.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +VkResult anv_CreateDmaBufImageINTEL( + VkDevice _device, + const VkDmaBufImageCreateInfo* pCreateInfo, + VkDeviceMemory* pMem, + VkImage* pImage) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_device_memory *mem; + struct anv_image *image; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL); + + mem = anv_device_alloc(device, sizeof(*mem), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (mem == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd); + if (!mem->bo.gem_handle) { + result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + mem->bo.map = NULL; + mem->bo.index = 0; + mem->bo.offset = 0; + mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height; + + image = anv_device_alloc(device, sizeof(*image), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (image == NULL) { + result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_mem; + } + + *image = (struct anv_image) { + .bo = &mem->bo, + .offset = 0, + .type = VK_IMAGE_TYPE_2D, + .extent = pCreateInfo->extent, + .size = mem->bo.size, + + .primary_surface = { + .offset = 0, + .stride = pCreateInfo->strideInBytes, + .tile_mode = XMAJOR, + }, + }; + + assert(image->extent.width > 0); + assert(image->extent.height > 0); + assert(image->extent.depth == 1); + + *pMem = anv_device_memory_to_handle(mem); + *pImage = anv_image_to_handle(image); + + return VK_SUCCESS; + + fail_mem: + anv_gem_close(device, mem->bo.gem_handle); + fail: + anv_device_free(device, mem); + + return result; +} diff --git a/src/vulkan/anv_meta.c b/src/vulkan/anv_meta.c new file mode 100644 index 00000000000..8ee7eb012e6 --- /dev/null +++ b/src/vulkan/anv_meta.c @@ -0,0 +1,1452 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" +#include "anv_meta_spirv.h" + +static void +anv_device_init_meta_clear_state(struct anv_device *device) +{ + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. + */ + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + out vec4 f_color; + flat in vec4 v_color; + void main() + { + f_color = v_color; + } + ); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); + + /* We use instanced rendering to clear multiple render targets. We have two + * vertex buffers: the first vertex buffer holds per-vertex data and + * provides the vertices for the clear rectangle. The second one holds + * per-instance data, which consists of the VUE header (which selects the + * layer) and the color (Vulkan supports per-RT clear colors). + */ + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 8, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 32, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_INSTANCE + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Color */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, + .offsetInBytes = 16 + } + } + }; + + anv_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 1, + .pStages = &(VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL, + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + }, + &(struct anv_pipeline_create_info) { + .use_repclear = true, + .disable_viewport = true, + .use_rectlist = true + }, + &device->meta_state.clear.pipeline); + + anv_DestroyShaderModule(anv_device_to_handle(device), fsm); + anv_DestroyShader(anv_device_to_handle(device), fs); +} + +#define NUM_VB_USED 2 +struct anv_saved_state { + struct anv_vertex_binding old_vertex_bindings[NUM_VB_USED]; + struct anv_descriptor_set *old_descriptor_set0; + struct anv_pipeline *old_pipeline; + VkDynamicColorBlendState cb_state; +}; + +static void +anv_cmd_buffer_save(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *state) +{ + state->old_pipeline = cmd_buffer->state.pipeline; + state->old_descriptor_set0 = cmd_buffer->state.descriptors[0].set; + memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings, + sizeof(state->old_vertex_bindings)); +} + +static void +anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *state) +{ + cmd_buffer->state.pipeline = state->old_pipeline; + cmd_buffer->state.descriptors[0].set = state->old_descriptor_set0; + memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings, + sizeof(state->old_vertex_bindings)); + + cmd_buffer->state.vb_dirty |= (1 << NUM_VB_USED) - 1; + cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_VERTEX_BIT; +} + +struct vue_header { + uint32_t Reserved; + uint32_t RTAIndex; + uint32_t ViewportIndex; + float PointWidth; +}; + +struct clear_instance_data { + struct vue_header vue_header; + VkClearColorValue color; +}; + +static void +meta_emit_clear(struct anv_cmd_buffer *cmd_buffer, + int num_instances, + struct clear_instance_data *instance_data) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; + struct anv_state state; + uint32_t size; + + const float vertex_data[] = { + /* Rect-list coordinates */ + 0.0, 0.0, + fb->width, 0.0, + fb->width, fb->height, + + /* Align to 16 bytes */ + 0.0, 0.0, + }; + + size = sizeof(vertex_data) + num_instances * sizeof(*instance_data); + state = anv_state_stream_alloc(&cmd_buffer->surface_state_stream, size, 16); + + /* Copy in the vertex and instance data */ + memcpy(state.map, vertex_data, sizeof(vertex_data)); + memcpy(state.map + sizeof(vertex_data), instance_data, + num_instances * sizeof(*instance_data)); + + struct anv_buffer vertex_buffer = { + .device = cmd_buffer->device, + .size = size, + .bo = &device->surface_state_block_pool.bo, + .offset = state.offset + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(vertex_data) + }); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.clear.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.clear.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->state.rs_state == NULL) + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.rs_state); + + if (cmd_buffer->state.vp_state == NULL) + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + cmd_buffer->state.framebuffer->vp_state); + + if (cmd_buffer->state.ds_state == NULL) + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); + + if (cmd_buffer->state.cb_state == NULL) + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); + + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, num_instances); +} + +void +anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values) +{ + struct anv_saved_state saved_state; + + int num_clear_layers = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + if (anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + anv_finishme("Can't clear depth-stencil yet"); + continue; + } + num_clear_layers++; + } + } + + if (num_clear_layers == 0) + return; + + struct clear_instance_data instance_data[num_clear_layers]; + uint32_t color_attachments[num_clear_layers]; + + int layer = 0; + for (uint32_t i = 0; i < pass->attachment_count; i++) { + if (pass->attachments[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR && + !anv_is_vk_format_depth_or_stencil(pass->attachments[i].format)) { + instance_data[layer] = (struct clear_instance_data) { + .vue_header = { + .RTAIndex = i, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = clear_values[i].color, + }; + color_attachments[layer] = i; + layer++; + } + } + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + struct anv_subpass subpass = { + .input_count = 0, + .color_count = num_clear_layers, + .color_attachments = color_attachments, + .depth_stencil_attachment = VK_ATTACHMENT_UNUSED, + }; + + anv_cmd_buffer_begin_subpass(cmd_buffer, &subpass); + + meta_emit_clear(cmd_buffer, num_clear_layers, instance_data); + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +static void +anv_device_init_meta_blit_state(struct anv_device *device) +{ + /* We don't use a vertex shader for clearing, but instead build and pass + * the VUEs directly to the rasterization backend. However, we do need + * to provide GLSL source for the vertex shader so that the compiler + * does not dead-code our inputs. + */ + VkShaderModule vsm = GLSL_VK_SHADER_MODULE(device, VERTEX, + in vec2 a_pos; + in vec2 a_tex_coord; + out vec4 v_tex_coord; + void main() + { + v_tex_coord = vec4(a_tex_coord, 0, 1); + gl_Position = vec4(a_pos, 0, 1); + } + ); + + VkShaderModule fsm = GLSL_VK_SHADER_MODULE(device, FRAGMENT, + out vec4 f_color; + in vec4 v_tex_coord; + layout(set = 0, binding = 0) uniform sampler2D u_tex; + void main() + { + f_color = texture(u_tex, v_tex_coord.xy); + } + ); + + VkShader vs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = vsm, + .pName = "main", + }, &vs); + + VkShader fs; + anv_CreateShader(anv_device_to_handle(device), + &(VkShaderCreateInfo) { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO, + .module = fsm, + .pName = "main", + }, &fs); + + VkPipelineVertexInputStateCreateInfo vi_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .bindingCount = 2, + .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) { + { + .binding = 0, + .strideInBytes = 0, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + { + .binding = 1, + .strideInBytes = 16, + .stepRate = VK_VERTEX_INPUT_STEP_RATE_VERTEX + }, + }, + .attributeCount = 3, + .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) { + { + /* VUE Header */ + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32B32A32_UINT, + .offsetInBytes = 0 + }, + { + /* Position */ + .location = 1, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 0 + }, + { + /* Texture Coordinate */ + .location = 2, + .binding = 1, + .format = VK_FORMAT_R32G32_SFLOAT, + .offsetInBytes = 8 + } + } + }; + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .count = 1, + .pBinding = (VkDescriptorSetLayoutBinding[]) { + { + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .arraySize = 1, + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .pImmutableSamplers = NULL + }, + } + }; + anv_CreateDescriptorSetLayout(anv_device_to_handle(device), &ds_layout_info, + &device->meta_state.blit.ds_layout); + + anv_CreatePipelineLayout(anv_device_to_handle(device), + &(VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .descriptorSetCount = 1, + .pSetLayouts = &device->meta_state.blit.ds_layout, + }, + &device->meta_state.blit.pipeline_layout); + + anv_pipeline_create(anv_device_to_handle(device), + &(VkGraphicsPipelineCreateInfo) { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX, + .shader = vs, + .pSpecializationInfo = NULL + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT, + .shader = fs, + .pSpecializationInfo = NULL + }, + }, + .pVertexInputState = &vi_create_info, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, + .primitiveRestartEnable = false, + }, + .pRasterState = &(VkPipelineRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTER_STATE_CREATE_INFO, + .depthClipEnable = true, + .rasterizerDiscardEnable = false, + .fillMode = VK_FILL_MODE_SOLID, + .cullMode = VK_CULL_MODE_NONE, + .frontFace = VK_FRONT_FACE_CCW + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { .channelWriteMask = VK_CHANNEL_A_BIT | + VK_CHANNEL_R_BIT | VK_CHANNEL_G_BIT | VK_CHANNEL_B_BIT }, + } + }, + .flags = 0, + .layout = device->meta_state.blit.pipeline_layout, + }, + &(struct anv_pipeline_create_info) { + .use_repclear = false, + .disable_viewport = true, + .disable_scissor = true, + .disable_vs = true, + .use_rectlist = true + }, + &device->meta_state.blit.pipeline); + + anv_DestroyShaderModule(anv_device_to_handle(device), vsm); + anv_DestroyShader(anv_device_to_handle(device), vs); + anv_DestroyShaderModule(anv_device_to_handle(device), fsm); + anv_DestroyShader(anv_device_to_handle(device), fs); +} + +static void +meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_saved_state *saved_state) +{ + struct anv_device *device = cmd_buffer->device; + + anv_cmd_buffer_save(cmd_buffer, saved_state); + + if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(device->meta_state.blit.pipeline)) + anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline); + + /* We don't need anything here, only set if not already set. */ + if (cmd_buffer->state.rs_state == NULL) + anv_CmdBindDynamicRasterState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.rs_state); + if (cmd_buffer->state.ds_state == NULL) + anv_CmdBindDynamicDepthStencilState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.ds_state); + + saved_state->cb_state = anv_dynamic_cb_state_to_handle(cmd_buffer->state.cb_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + device->meta_state.shared.cb_state); +} + +struct blit_region { + VkOffset3D src_offset; + VkExtent3D src_extent; + VkOffset3D dest_offset; + VkExtent3D dest_extent; +}; + +static void +meta_emit_blit(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *src, + VkOffset3D src_offset, + VkExtent3D src_extent, + struct anv_color_attachment_view *dest, + VkOffset3D dest_offset, + VkExtent3D dest_extent) +{ + struct anv_device *device = cmd_buffer->device; + VkDescriptorPool dummy_desc_pool = { .handle = 1 }; + + struct blit_vb_data { + float pos[2]; + float tex_coord[2]; + } *vb_data; + + unsigned vb_size = sizeof(struct vue_header) + 3 * sizeof(*vb_data); + + struct anv_state vb_state = + anv_state_stream_alloc(&cmd_buffer->surface_state_stream, vb_size, 16); + memset(vb_state.map, 0, sizeof(struct vue_header)); + vb_data = vb_state.map + sizeof(struct vue_header); + + vb_data[0] = (struct blit_vb_data) { + .pos = { + dest_offset.x + dest_extent.width, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)(src_offset.x + src_extent.width) / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[1] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y + dest_extent.height, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)(src_offset.y + src_extent.height) / (float)src->extent.height, + }, + }; + + vb_data[2] = (struct blit_vb_data) { + .pos = { + dest_offset.x, + dest_offset.y, + }, + .tex_coord = { + (float)src_offset.x / (float)src->extent.width, + (float)src_offset.y / (float)src->extent.height, + }, + }; + + struct anv_buffer vertex_buffer = { + .device = device, + .size = vb_size, + .bo = &device->surface_state_block_pool.bo, + .offset = vb_state.offset, + }; + + anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2, + (VkBuffer[]) { + anv_buffer_to_handle(&vertex_buffer), + anv_buffer_to_handle(&vertex_buffer) + }, + (VkDeviceSize[]) { + 0, + sizeof(struct vue_header), + }); + + uint32_t count; + VkDescriptorSet set; + anv_AllocDescriptorSets(anv_device_to_handle(device), dummy_desc_pool, + VK_DESCRIPTOR_SET_USAGE_ONE_SHOT, + 1, &device->meta_state.blit.ds_layout, &set, &count); + anv_UpdateDescriptorSets(anv_device_to_handle(device), + 1, /* writeCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .destSet = set, + .destBinding = 0, + .destArrayElement = 0, + .count = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pDescriptors = (VkDescriptorInfo[]) { + { + .imageView = anv_image_view_to_handle(src), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + } + }, 0, NULL); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { + { + .view = anv_attachment_view_to_handle(&dest->base), + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .width = dest->base.extent.width, + .height = dest->base.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = dest->view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = pass, + .framebuffer = fb, + .renderArea = { + .offset = { dest_offset.x, dest_offset.y }, + .extent = { dest_extent.width, dest_extent.height }, + }, + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + anv_CmdBindDynamicViewportState(anv_cmd_buffer_to_handle(cmd_buffer), + anv_framebuffer_from_handle(fb)->vp_state); + + anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_GRAPHICS, + device->meta_state.blit.pipeline_layout, 0, 1, + &set, 0, NULL); + + anv_CmdDraw(anv_cmd_buffer_to_handle(cmd_buffer), 0, 3, 0, 1); + + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + + /* At the point where we emit the draw call, all data from the + * descriptor sets, etc. has been used. We are free to delete it. + */ + anv_descriptor_set_destroy(device, anv_descriptor_set_from_handle(set)); + anv_DestroyFramebuffer(anv_device_to_handle(device), fb); + anv_DestroyRenderPass(anv_device_to_handle(device), pass); +} + +static void +meta_finish_blit(struct anv_cmd_buffer *cmd_buffer, + const struct anv_saved_state *saved_state) +{ + anv_cmd_buffer_restore(cmd_buffer, saved_state); + anv_CmdBindDynamicColorBlendState(anv_cmd_buffer_to_handle(cmd_buffer), + saved_state->cb_state); +} + +static VkFormat +vk_format_for_cpp(int cpp) +{ + switch (cpp) { + case 1: return VK_FORMAT_R8_UINT; + case 2: return VK_FORMAT_R8G8_UINT; + case 3: return VK_FORMAT_R8G8B8_UINT; + case 4: return VK_FORMAT_R8G8B8A8_UINT; + case 6: return VK_FORMAT_R16G16B16_UINT; + case 8: return VK_FORMAT_R16G16B16A16_UINT; + case 12: return VK_FORMAT_R32G32B32_UINT; + case 16: return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Invalid format cpp"); + } +} + +static void +do_buffer_copy(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *src, uint64_t src_offset, + struct anv_bo *dest, uint64_t dest_offset, + int width, int height, VkFormat copy_format) +{ + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = copy_format, + .extent = { + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }; + + VkImage src_image, dest_image; + anv_CreateImage(vk_device, &image_info, &src_image); + anv_CreateImage(vk_device, &image_info, &dest_image); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + anv_image_from_handle(src_image)->bo = src; + anv_image_from_handle(src_image)->offset = src_offset; + anv_image_from_handle(dest_image)->bo = dest; + anv_image_from_handle(dest_image)->offset = dest_offset; + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = src_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = copy_format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = VK_IMAGE_ASPECT_COLOR, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = dest_image, + .format = copy_format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }, + &dest_view, + (VkOffset3D) { 0, 0, 0 }, + (VkExtent3D) { width, height, 1 }); + + anv_DestroyImage(vk_device, src_image); + anv_DestroyImage(vk_device, dest_image); +} + +void anv_CmdCopyBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; + uint64_t dest_offset = dest_buffer->offset + pRegions[r].destOffset; + uint64_t copy_size = pRegions[r].copySize; + + /* First, we compute the biggest format that can be used with the + * given offsets and size. + */ + int cpp = 16; + + int fs = ffs(src_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(src_offset % cpp == 0); + + fs = ffs(dest_offset) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(dest_offset % cpp == 0); + + fs = ffs(pRegions[r].copySize) - 1; + if (fs != -1) + cpp = MIN2(cpp, 1 << fs); + assert(pRegions[r].copySize % cpp == 0); + + VkFormat copy_format = vk_format_for_cpp(cpp); + + /* This is maximum possible width/height our HW can handle */ + uint64_t max_surface_dim = 1 << 14; + + /* First, we make a bunch of max-sized copies */ + uint64_t max_copy_size = max_surface_dim * max_surface_dim * cpp; + while (copy_size > max_copy_size) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, max_surface_dim, copy_format); + copy_size -= max_copy_size; + src_offset += max_copy_size; + dest_offset += max_copy_size; + } + + uint64_t height = copy_size / (max_surface_dim * cpp); + assert(height < max_surface_dim); + if (height != 0) { + uint64_t rect_copy_size = height * max_surface_dim * cpp; + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + max_surface_dim, height, copy_format); + copy_size -= rect_copy_size; + src_offset += rect_copy_size; + dest_offset += rect_copy_size; + } + + if (copy_size != 0) { + do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset, + dest_buffer->bo, dest_offset, + copy_size / cpp, 1, copy_format); + } + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].srcOffset, + pRegions[r].extent, + &dest_view, + pRegions[r].destOffset, + pRegions[r].extent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdBlitImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit* pRegions, + VkTexFilter filter) + +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + struct anv_saved_state saved_state; + + anv_finishme("respect VkTexFilter"); + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].srcSubresource.aspect, + .baseMipLevel = pRegions[r].srcSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].srcSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = dest_image->format, + .mipLevel = pRegions[r].destSubresource.mipLevel, + .baseArraySlice = pRegions[r].destSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].srcOffset, + pRegions[r].srcExtent, + &dest_view, + pRegions[r].destOffset, + pRegions[r].destExtent); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyBufferToImage( + VkCmdBuffer cmdBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + + VkImage srcImage; + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = dest_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, &srcImage); + + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + src_image->bo = src_buffer->bo; + src_image->offset = src_buffer->offset + pRegions[r].bufferOffset; + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = anv_image_to_handle(src_image), + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = dest_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = 0, + .mipLevels = 1, + .baseArraySlice = 0, + .arraySize = 1 + }, + }, + cmd_buffer); + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = anv_image_to_handle(dest_image), + .format = dest_image->format, + .mipLevel = pRegions[r].imageSubresource.mipLevel, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent, + &dest_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, srcImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdCopyImageToBuffer( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy* pRegions) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, src_image, srcImage); + ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer); + VkDevice vk_device = anv_device_to_handle(cmd_buffer->device); + struct anv_saved_state saved_state; + + meta_prepare_blit(cmd_buffer, &saved_state); + + for (unsigned r = 0; r < regionCount; r++) { + if (pRegions[r].bufferRowLength != 0) + anv_finishme("bufferRowLength not supported in CopyBufferToImage"); + if (pRegions[r].bufferImageHeight != 0) + anv_finishme("bufferImageHeight not supported in CopyBufferToImage"); + + struct anv_image_view src_view; + anv_image_view_init(&src_view, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = srcImage, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = src_image->format, + .channels = { + VK_CHANNEL_SWIZZLE_R, + VK_CHANNEL_SWIZZLE_G, + VK_CHANNEL_SWIZZLE_B, + VK_CHANNEL_SWIZZLE_A + }, + .subresourceRange = { + .aspect = pRegions[r].imageSubresource.aspect, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .mipLevels = 1, + .baseArraySlice = pRegions[r].imageSubresource.arraySlice, + .arraySize = 1 + }, + }, + cmd_buffer); + + VkImage destImage; + anv_CreateImage(vk_device, + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = src_image->format, + .extent = { + .width = pRegions[r].imageExtent.width, + .height = pRegions[r].imageExtent.height, + .depth = 1, + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT, + .flags = 0, + }, &destImage); + + ANV_FROM_HANDLE(anv_image, dest_image, destImage); + + /* We could use a vk call to bind memory, but that would require + * creating a dummy memory object etc. so there's really no point. + */ + dest_image->bo = dest_buffer->bo; + dest_image->offset = dest_buffer->offset + pRegions[r].bufferOffset; + + struct anv_color_attachment_view dest_view; + anv_color_attachment_view_init(&dest_view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = destImage, + .format = src_image->format, + .mipLevel = 0, + .baseArraySlice = 0, + .arraySize = 1, + }, + cmd_buffer); + + meta_emit_blit(cmd_buffer, + &src_view, + pRegions[r].imageOffset, + pRegions[r].imageExtent, + &dest_view, + (VkOffset3D) { 0, 0, 0 }, + pRegions[r].imageExtent); + + anv_DestroyImage(vk_device, destImage); + } + + meta_finish_blit(cmd_buffer, &saved_state); +} + +void anv_CmdUpdateBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize dataSize, + const uint32_t* pData) +{ + stub(); +} + +void anv_CmdFillBuffer( + VkCmdBuffer cmdBuffer, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + stub(); +} + +void anv_CmdClearColorImage( + VkCmdBuffer cmdBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_image, image, _image); + struct anv_saved_state saved_state; + + anv_cmd_buffer_save(cmd_buffer, &saved_state); + + for (uint32_t r = 0; r < rangeCount; r++) { + for (uint32_t l = 0; l < pRanges[r].mipLevels; l++) { + for (uint32_t s = 0; s < pRanges[r].arraySize; s++) { + struct anv_color_attachment_view view; + anv_color_attachment_view_init(&view, cmd_buffer->device, + &(VkAttachmentViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_VIEW_CREATE_INFO, + .image = _image, + .format = image->format, + .mipLevel = pRanges[r].baseMipLevel + l, + .baseArraySlice = pRanges[r].baseArraySlice + s, + .arraySize = 1, + }, + cmd_buffer); + + VkFramebuffer fb; + anv_CreateFramebuffer(anv_device_to_handle(cmd_buffer->device), + &(VkFramebufferCreateInfo) { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkAttachmentBindInfo[]) { + { + .view = anv_attachment_view_to_handle(&view.base), + .layout = VK_IMAGE_LAYOUT_GENERAL + } + }, + .width = view.base.extent.width, + .height = view.base.extent.height, + .layers = 1 + }, &fb); + + VkRenderPass pass; + anv_CreateRenderPass(anv_device_to_handle(cmd_buffer->device), + &(VkRenderPassCreateInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION, + .format = view.view.format, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputCount = 0, + .colorCount = 1, + .colorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .resolveAttachments = NULL, + .depthStencilAttachment = (VkAttachmentReference) { + .attachment = VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + .preserveCount = 1, + .preserveAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, + }, + .dependencyCount = 0, + }, &pass); + + anv_CmdBeginRenderPass(anv_cmd_buffer_to_handle(cmd_buffer), + &(VkRenderPassBeginInfo) { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderArea = { + .offset = { 0, 0, }, + .extent = { + .width = view.base.extent.width, + .height = view.base.extent.height, + }, + }, + .renderPass = pass, + .framebuffer = fb, + .attachmentCount = 1, + .pAttachmentClearValues = NULL, + }, VK_RENDER_PASS_CONTENTS_INLINE); + + struct clear_instance_data instance_data = { + .vue_header = { + .RTAIndex = 0, + .ViewportIndex = 0, + .PointWidth = 0.0 + }, + .color = *pColor, + }; + + meta_emit_clear(cmd_buffer, 1, &instance_data); + + anv_CmdEndRenderPass(anv_cmd_buffer_to_handle(cmd_buffer)); + } + } + } + + /* Restore API state */ + anv_cmd_buffer_restore(cmd_buffer, &saved_state); +} + +void anv_CmdClearDepthStencilImage( + VkCmdBuffer cmdBuffer, + VkImage image, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rangeCount, + const VkImageSubresourceRange* pRanges) +{ + stub(); +} + +void anv_CmdClearColorAttachment( + VkCmdBuffer cmdBuffer, + uint32_t colorAttachment, + VkImageLayout imageLayout, + const VkClearColorValue* pColor, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdClearDepthStencilAttachment( + VkCmdBuffer cmdBuffer, + VkImageAspectFlags imageAspectMask, + VkImageLayout imageLayout, + float depth, + uint32_t stencil, + uint32_t rectCount, + const VkRect3D* pRects) +{ + stub(); +} + +void anv_CmdResolveImage( + VkCmdBuffer cmdBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageResolve* pRegions) +{ + stub(); +} + +void +anv_device_init_meta(struct anv_device *device) +{ + anv_device_init_meta_clear_state(device); + anv_device_init_meta_blit_state(device); + + anv_CreateDynamicRasterState(anv_device_to_handle(device), + &(VkDynamicRasterStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO, + }, + &device->meta_state.shared.rs_state); + + anv_CreateDynamicColorBlendState(anv_device_to_handle(device), + &(VkDynamicColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO + }, + &device->meta_state.shared.cb_state); + + anv_CreateDynamicDepthStencilState(anv_device_to_handle(device), + &(VkDynamicDepthStencilStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO + }, + &device->meta_state.shared.ds_state); +} + +void +anv_device_finish_meta(struct anv_device *device) +{ + /* Clear */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.clear.pipeline); + + /* Blit */ + anv_DestroyPipeline(anv_device_to_handle(device), + device->meta_state.blit.pipeline); + anv_DestroyPipelineLayout(anv_device_to_handle(device), + device->meta_state.blit.pipeline_layout); + anv_DestroyDescriptorSetLayout(anv_device_to_handle(device), + device->meta_state.blit.ds_layout); + + /* Shared */ + anv_DestroyDynamicRasterState(anv_device_to_handle(device), + device->meta_state.shared.rs_state); + anv_DestroyDynamicColorBlendState(anv_device_to_handle(device), + device->meta_state.shared.cb_state); + anv_DestroyDynamicDepthStencilState(anv_device_to_handle(device), + device->meta_state.shared.ds_state); +} diff --git a/src/vulkan/anv_pipeline.c b/src/vulkan/anv_pipeline.c new file mode 100644 index 00000000000..3c9c14193de --- /dev/null +++ b/src/vulkan/anv_pipeline.c @@ -0,0 +1,950 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +VkResult anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_device_free(device, module); + + return VK_SUCCESS; +} + +VkResult anv_CreateShader( + VkDevice _device, + const VkShaderCreateInfo* pCreateInfo, + VkShader* pShader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); + struct anv_shader *shader; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; + size_t name_len = strlen(name); + + if (strcmp(name, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (shader == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->module = module; + memcpy(shader->entrypoint, name, name_len + 1); + + *pShader = anv_shader_to_handle(shader); + + return VK_SUCCESS; +} + +VkResult anv_DestroyShader( + VkDevice _device, + VkShader _shader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader, shader, _shader); + + anv_device_free(device, shader); + + return VK_SUCCESS; +} + + +VkResult anv_CreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + VkPipelineCache* pPipelineCache) +{ + pPipelineCache->handle = 1; + + stub_return(VK_SUCCESS); +} + +VkResult anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache) +{ + /* VkPipelineCache is a dummy object. */ + return VK_SUCCESS; +} + +size_t anv_GetPipelineCacheSize( + VkDevice device, + VkPipelineCache pipelineCache) +{ + stub_return(0); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + void* pData) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_UNSUPPORTED); +} + +// Pipeline functions + +static void +emit_vertex_input(struct anv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *info) +{ + const uint32_t num_dwords = 1 + info->attributeCount * 2; + uint32_t *p; + bool instancing_enable[32]; + + pipeline->vb_used = 0; + for (uint32_t i = 0; i < info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + instancing_enable[desc->binding] = true; + break; + } + } + + p = anv_batch_emitn(&pipeline->batch, num_dwords, + GEN8_3DSTATE_VERTEX_ELEMENTS); + + for (uint32_t i = 0; i < info->attributeCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + const struct anv_format *format = anv_format_for_vk_format(desc->format); + + struct GEN8_VERTEX_ELEMENT_STATE element = { + .VertexBufferIndex = desc->binding, + .Valid = true, + .SourceElementFormat = format->surface_format, + .EdgeFlagEnable = false, + .SourceElementOffset = desc->offsetInBytes, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = format->num_channels >= 2 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component2Control = format->num_channels >= 3 ? VFCOMP_STORE_SRC : VFCOMP_STORE_0, + .Component3Control = format->num_channels >= 4 ? VFCOMP_STORE_SRC : VFCOMP_STORE_1_FP + }; + GEN8_VERTEX_ELEMENT_STATE_pack(NULL, &p[1 + i * 2], &element); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_INSTANCING, + .InstancingEnable = instancing_enable[desc->binding], + .VertexElementIndex = i, + /* Vulkan so far doesn't have an instance divisor, so + * this is always 1 (ignored if not instancing). */ + .InstanceDataStepRate = 1); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_SGVS, + .VertexIDEnable = pipeline->vs_prog_data.uses_vertexid, + .VertexIDComponentNumber = 2, + .VertexIDElementOffset = info->bindingCount, + .InstanceIDEnable = pipeline->vs_prog_data.uses_instanceid, + .InstanceIDComponentNumber = 3, + .InstanceIDElementOffset = info->bindingCount); +} + +static void +emit_ia_state(struct anv_pipeline *pipeline, + const VkPipelineInputAssemblyStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 + }; + uint32_t topology = vk_to_gen_primitive_type[info->topology]; + + if (extra && extra->use_rectlist) + topology = _3DPRIM_RECTLIST; + + struct GEN8_3DSTATE_VF vf = { + GEN8_3DSTATE_VF_header, + .IndexedDrawCutIndexEnable = info->primitiveRestartEnable, + }; + GEN8_3DSTATE_VF_pack(NULL, pipeline->state_vf, &vf); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_TOPOLOGY, + .PrimitiveTopologyType = topology); +} + +static void +emit_rs_state(struct anv_pipeline *pipeline, + const VkPipelineRasterStateCreateInfo *info, + const struct anv_pipeline_create_info *extra) +{ + static const uint32_t vk_to_gen_cullmode[] = { + [VK_CULL_MODE_NONE] = CULLMODE_NONE, + [VK_CULL_MODE_FRONT] = CULLMODE_FRONT, + [VK_CULL_MODE_BACK] = CULLMODE_BACK, + [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH + }; + + static const uint32_t vk_to_gen_fillmode[] = { + [VK_FILL_MODE_POINTS] = RASTER_POINT, + [VK_FILL_MODE_WIREFRAME] = RASTER_WIREFRAME, + [VK_FILL_MODE_SOLID] = RASTER_SOLID + }; + + static const uint32_t vk_to_gen_front_face[] = { + [VK_FRONT_FACE_CCW] = CounterClockwise, + [VK_FRONT_FACE_CW] = Clockwise + }; + + struct GEN8_3DSTATE_SF sf = { + GEN8_3DSTATE_SF_header, + .ViewportTransformEnable = !(extra && extra->disable_viewport), + .TriangleStripListProvokingVertexSelect = 0, + .LineStripListProvokingVertexSelect = 0, + .TriangleFanProvokingVertexSelect = 0, + .PointWidthSource = pipeline->writes_point_size ? Vertex : State, + .PointWidth = 1.0, + }; + + /* FINISHME: VkBool32 rasterizerDiscardEnable; */ + + GEN8_3DSTATE_SF_pack(NULL, pipeline->state_sf, &sf); + + struct GEN8_3DSTATE_RASTER raster = { + GEN8_3DSTATE_RASTER_header, + .FrontWinding = vk_to_gen_front_face[info->frontFace], + .CullMode = vk_to_gen_cullmode[info->cullMode], + .FrontFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .BackFaceFillMode = vk_to_gen_fillmode[info->fillMode], + .ScissorRectangleEnable = !(extra && extra->disable_scissor), + .ViewportZClipTestEnable = info->depthClipEnable + }; + + GEN8_3DSTATE_RASTER_pack(NULL, pipeline->state_raster, &raster); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE, + .ForceVertexURBEntryReadLength = false, + .ForceVertexURBEntryReadOffset = false, + .PointSpriteTextureCoordinateOrigin = UPPERLEFT, + .NumberofSFOutputAttributes = + pipeline->wm_prog_data.num_varying_inputs); + +} + +static void +emit_cb_state(struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *info) +{ + struct anv_device *device = pipeline->device; + + static const uint32_t vk_to_gen_logic_op[] = { + [VK_LOGIC_OP_COPY] = LOGICOP_COPY, + [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR, + [VK_LOGIC_OP_AND] = LOGICOP_AND, + [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE, + [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED, + [VK_LOGIC_OP_NOOP] = LOGICOP_NOOP, + [VK_LOGIC_OP_XOR] = LOGICOP_XOR, + [VK_LOGIC_OP_OR] = LOGICOP_OR, + [VK_LOGIC_OP_NOR] = LOGICOP_NOR, + [VK_LOGIC_OP_EQUIV] = LOGICOP_EQUIV, + [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT, + [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE, + [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED, + [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED, + [VK_LOGIC_OP_NAND] = LOGICOP_NAND, + [VK_LOGIC_OP_SET] = LOGICOP_SET, + }; + + static const uint32_t vk_to_gen_blend[] = { + [VK_BLEND_ZERO] = BLENDFACTOR_ZERO, + [VK_BLEND_ONE] = BLENDFACTOR_ONE, + [VK_BLEND_SRC_COLOR] = BLENDFACTOR_SRC_COLOR, + [VK_BLEND_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR, + [VK_BLEND_DEST_COLOR] = BLENDFACTOR_DST_COLOR, + [VK_BLEND_ONE_MINUS_DEST_COLOR] = BLENDFACTOR_INV_DST_COLOR, + [VK_BLEND_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA, + [VK_BLEND_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA, + [VK_BLEND_DEST_ALPHA] = BLENDFACTOR_DST_ALPHA, + [VK_BLEND_ONE_MINUS_DEST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA, + [VK_BLEND_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR, + [VK_BLEND_ONE_MINUS_CONSTANT_COLOR] = BLENDFACTOR_INV_CONST_COLOR, + [VK_BLEND_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA, + [VK_BLEND_ONE_MINUS_CONSTANT_ALPHA] = BLENDFACTOR_INV_CONST_ALPHA, + [VK_BLEND_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE, + [VK_BLEND_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR, + [VK_BLEND_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR, + [VK_BLEND_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA, + [VK_BLEND_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA, + }; + + static const uint32_t vk_to_gen_blend_op[] = { + [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD, + [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT, + [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT, + [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN, + [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX, + }; + + uint32_t num_dwords = GEN8_BLEND_STATE_length; + pipeline->blend_state = + anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); + + struct GEN8_BLEND_STATE blend_state = { + .AlphaToCoverageEnable = info->alphaToCoverageEnable, + }; + + for (uint32_t i = 0; i < info->attachmentCount; i++) { + const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[i]; + + blend_state.Entry[i] = (struct GEN8_BLEND_STATE_ENTRY) { + .LogicOpEnable = info->logicOpEnable, + .LogicOpFunction = vk_to_gen_logic_op[info->logicOp], + .ColorBufferBlendEnable = a->blendEnable, + .PreBlendSourceOnlyClampEnable = false, + .PreBlendColorClampEnable = false, + .PostBlendColorClampEnable = false, + .SourceBlendFactor = vk_to_gen_blend[a->srcBlendColor], + .DestinationBlendFactor = vk_to_gen_blend[a->destBlendColor], + .ColorBlendFunction = vk_to_gen_blend_op[a->blendOpColor], + .SourceAlphaBlendFactor = vk_to_gen_blend[a->srcBlendAlpha], + .DestinationAlphaBlendFactor = vk_to_gen_blend[a->destBlendAlpha], + .AlphaBlendFunction = vk_to_gen_blend_op[a->blendOpAlpha], + .WriteDisableAlpha = !(a->channelWriteMask & VK_CHANNEL_A_BIT), + .WriteDisableRed = !(a->channelWriteMask & VK_CHANNEL_R_BIT), + .WriteDisableGreen = !(a->channelWriteMask & VK_CHANNEL_G_BIT), + .WriteDisableBlue = !(a->channelWriteMask & VK_CHANNEL_B_BIT), + }; + } + + GEN8_BLEND_STATE_pack(NULL, pipeline->blend_state.map, &blend_state); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_BLEND_STATE_POINTERS, + .BlendStatePointer = pipeline->blend_state.offset, + .BlendStatePointerValid = true); +} + +static const uint32_t vk_to_gen_compare_op[] = { + [VK_COMPARE_OP_NEVER] = COMPAREFUNCTION_NEVER, + [VK_COMPARE_OP_LESS] = COMPAREFUNCTION_LESS, + [VK_COMPARE_OP_EQUAL] = COMPAREFUNCTION_EQUAL, + [VK_COMPARE_OP_LESS_EQUAL] = COMPAREFUNCTION_LEQUAL, + [VK_COMPARE_OP_GREATER] = COMPAREFUNCTION_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = COMPAREFUNCTION_NOTEQUAL, + [VK_COMPARE_OP_GREATER_EQUAL] = COMPAREFUNCTION_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = COMPAREFUNCTION_ALWAYS, +}; + +static const uint32_t vk_to_gen_stencil_op[] = { + [VK_STENCIL_OP_KEEP] = 0, + [VK_STENCIL_OP_ZERO] = 0, + [VK_STENCIL_OP_REPLACE] = 0, + [VK_STENCIL_OP_INC_CLAMP] = 0, + [VK_STENCIL_OP_DEC_CLAMP] = 0, + [VK_STENCIL_OP_INVERT] = 0, + [VK_STENCIL_OP_INC_WRAP] = 0, + [VK_STENCIL_OP_DEC_WRAP] = 0 +}; + +static void +emit_ds_state(struct anv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *info) +{ + if (info == NULL) { + /* We're going to OR this together with the dynamic state. We need + * to make sure it's initialized to something useful. + */ + memset(pipeline->state_wm_depth_stencil, 0, + sizeof(pipeline->state_wm_depth_stencil)); + return; + } + + /* VkBool32 depthBoundsEnable; // optional (depth_bounds_test) */ + + struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = { + .DepthTestEnable = info->depthTestEnable, + .DepthBufferWriteEnable = info->depthWriteEnable, + .DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp], + .DoubleSidedStencilEnable = true, + + .StencilTestEnable = info->stencilTestEnable, + .StencilFailOp = vk_to_gen_stencil_op[info->front.stencilFailOp], + .StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.stencilPassOp], + .StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.stencilDepthFailOp], + .StencilTestFunction = vk_to_gen_compare_op[info->front.stencilCompareOp], + .BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.stencilFailOp], + .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.stencilPassOp], + .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.stencilDepthFailOp], + .BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.stencilCompareOp], + }; + + GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, pipeline->state_wm_depth_stencil, &wm_depth_stencil); +} + +VkResult +anv_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const struct anv_pipeline_create_info * extra, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + uint32_t offset, length; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + pipeline->shaders[pCreateInfo->pStages[i].stage] = + anv_shader_from_handle(pCreateInfo->pStages[i].shader); + } + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pViewportState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + + anv_compiler_run(device->compiler, pipeline); + + /* FIXME: The compiler dead-codes FS inputs when we don't have a VS, so we + * hard code this to num_attributes - 2. This is because the attributes + * include VUE header and position, which aren't counted as varying + * inputs. */ + if (pipeline->vs_simd8 == NO_KERNEL) { + pipeline->wm_prog_data.num_varying_inputs = + pCreateInfo->pVertexInputState->attributeCount - 2; + } + + assert(pCreateInfo->pVertexInputState); + emit_vertex_input(pipeline, pCreateInfo->pVertexInputState); + assert(pCreateInfo->pInputAssemblyState); + emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra); + assert(pCreateInfo->pRasterState); + emit_rs_state(pipeline, pCreateInfo->pRasterState, extra); + emit_ds_state(pipeline, pCreateInfo->pDepthStencilState); + emit_cb_state(pipeline, pCreateInfo->pColorBlendState); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VF_STATISTICS, + .StatisticsEnable = true); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_HS, .Enable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_TE, .TEEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_DS, .FunctionEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_STREAMOUT, .SOFunctionEnable = false); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS, + .ConstantBufferOffset = 0, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS, + .ConstantBufferOffset = 4, + .ConstantBufferSize = 4); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS, + .ConstantBufferOffset = 8, + .ConstantBufferSize = 4); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM_CHROMAKEY, + .ChromaKeyKillEnable = false); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SBE_SWIZ); + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_AA_LINE_PARAMETERS); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_CLIP, + .ClipEnable = true, + .ViewportXYClipTestEnable = !(extra && extra->disable_viewport), + .MinimumPointWidth = 0.125, + .MaximumPointWidth = 255.875); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_WM, + .StatisticsEnable = true, + .LineEndCapAntialiasingRegionWidth = _05pixels, + .LineAntialiasingRegionWidth = _10pixels, + .EarlyDepthStencilControl = NORMAL, + .ForceThreadDispatchEnable = NORMAL, + .PointRasterizationRule = RASTRULE_UPPER_RIGHT, + .BarycentricInterpolationMode = + pipeline->wm_prog_data.barycentric_interp_modes); + + uint32_t samples = 1; + uint32_t log2_samples = __builtin_ffs(samples) - 1; + bool enable_sampling = samples > 1 ? true : false; + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_MULTISAMPLE, + .PixelPositionOffsetEnable = enable_sampling, + .PixelLocation = CENTER, + .NumberofMultisamples = log2_samples); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_SAMPLE_MASK, + .SampleMask = 0xffff); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_VS, + .VSURBStartingAddress = pipeline->urb.vs_start, + .VSURBEntryAllocationSize = pipeline->urb.vs_size - 1, + .VSNumberofURBEntries = pipeline->urb.nr_vs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_GS, + .GSURBStartingAddress = pipeline->urb.gs_start, + .GSURBEntryAllocationSize = pipeline->urb.gs_size - 1, + .GSNumberofURBEntries = pipeline->urb.nr_gs_entries); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_HS, + .HSURBStartingAddress = pipeline->urb.vs_start, + .HSURBEntryAllocationSize = 0, + .HSNumberofURBEntries = 0); + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_URB_DS, + .DSURBStartingAddress = pipeline->urb.vs_start, + .DSURBEntryAllocationSize = 0, + .DSNumberofURBEntries = 0); + + const struct brw_gs_prog_data *gs_prog_data = &pipeline->gs_prog_data; + offset = 1; + length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->gs_vec4 == NO_KERNEL) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, .Enable = false); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_GS, + .SingleProgramFlow = false, + .KernelStartPointer = pipeline->gs_vec4, + .VectorMaskEnable = Vmask, + .SamplerCount = 0, + .BindingTableEntryCount = 0, + .ExpectedVertexCount = pipeline->gs_vertex_count, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_GEOMETRY], + .PerThreadScratchSpace = ffs(gs_prog_data->base.base.total_scratch / 2048), + + .OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1, + .OutputTopology = gs_prog_data->output_topology, + .VertexURBEntryReadLength = gs_prog_data->base.urb_read_length, + .DispatchGRFStartRegisterForURBData = + gs_prog_data->base.base.dispatch_grf_start_reg, + + .MaximumNumberofThreads = device->info.max_gs_threads, + .ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords, + //pipeline->gs_prog_data.dispatch_mode | + .StatisticsEnable = true, + .IncludePrimitiveID = gs_prog_data->include_primitive_id, + .ReorderMode = TRAILING, + .Enable = true, + + .ControlDataFormat = gs_prog_data->control_data_format, + + /* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled: + * UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v) + * UserClipDistanceCullTestEnableBitmask(v) + */ + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length); + + const struct brw_vue_prog_data *vue_prog_data = &pipeline->vs_prog_data.base; + /* Skip the VUE header and position slots */ + offset = 1; + length = (vue_prog_data->vue_map.num_slots + 1) / 2 - offset; + + if (pipeline->vs_simd8 == NO_KERNEL || (extra && extra->disable_vs)) + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .FunctionEnable = false, + .VertexURBEntryOutputReadOffset = 1, + /* Even if VS is disabled, SBE still gets the amount of + * vertex data to read from this field. We use attribute + * count - 1, as we don't count the VUE header here. */ + .VertexURBEntryOutputLength = + DIV_ROUND_UP(pCreateInfo->pVertexInputState->attributeCount - 1, 2)); + else + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_VS, + .KernelStartPointer = pipeline->vs_simd8, + .SingleVertexDispatch = Multiple, + .VectorMaskEnable = Dmask, + .SamplerCount = 0, + .BindingTableEntryCount = + vue_prog_data->base.binding_table.size_bytes / 4, + .ThreadDispatchPriority = Normal, + .FloatingPointMode = IEEE754, + .IllegalOpcodeExceptionEnable = false, + .AccessesUAV = false, + .SoftwareExceptionEnable = false, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_VERTEX], + .PerThreadScratchSpace = ffs(vue_prog_data->base.total_scratch / 2048), + + .DispatchGRFStartRegisterForURBData = + vue_prog_data->base.dispatch_grf_start_reg, + .VertexURBEntryReadLength = vue_prog_data->urb_read_length, + .VertexURBEntryReadOffset = 0, + + .MaximumNumberofThreads = device->info.max_vs_threads - 1, + .StatisticsEnable = false, + .SIMD8DispatchEnable = true, + .VertexCacheDisable = false, + .FunctionEnable = true, + + .VertexURBEntryOutputReadOffset = offset, + .VertexURBEntryOutputLength = length, + .UserClipDistanceClipTestEnableBitmask = 0, + .UserClipDistanceCullTestEnableBitmask = 0); + + const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data; + uint32_t ksp0, ksp2, grf_start0, grf_start2; + + ksp2 = 0; + grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + ksp0 = pipeline->ps_simd8; + grf_start0 = wm_prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + ksp2 = pipeline->ps_simd16; + grf_start2 = wm_prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + ksp0 = pipeline->ps_simd16; + grf_start0 = wm_prog_data->dispatch_grf_start_reg_16; + } else { + unreachable("no ps shader"); + } + + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS, + .KernelStartPointer0 = ksp0, + + .SingleProgramFlow = false, + .VectorMaskEnable = true, + .SamplerCount = 1, + + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048), + + .MaximumNumberofThreadsPerPSD = 64 - 2, + .PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE, + .PushConstantEnable = wm_prog_data->base.nr_params > 0, + ._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL, + ._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL, + ._32PixelDispatchEnable = false, + + .DispatchGRFStartRegisterForConstantSetupData0 = grf_start0, + .DispatchGRFStartRegisterForConstantSetupData1 = 0, + .DispatchGRFStartRegisterForConstantSetupData2 = grf_start2, + + .KernelStartPointer1 = 0, + .KernelStartPointer2 = ksp2); + + bool per_sample_ps = false; + anv_batch_emit(&pipeline->batch, GEN8_3DSTATE_PS_EXTRA, + .PixelShaderValid = true, + .PixelShaderKillsPixel = wm_prog_data->uses_kill, + .PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode, + .AttributeEnable = wm_prog_data->num_varying_inputs > 0, + .oMaskPresenttoRenderTarget = wm_prog_data->uses_omask, + .PixelShaderIsPerSample = per_sample_ps); + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_compiler_free(pipeline); + anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_device_free(pipeline->device, pipeline); + + return VK_SUCCESS; +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + anv_shader_from_handle(pCreateInfo->cs.shader); + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data; + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = pipeline->scratch_start[VK_SHADER_STAGE_FRAGMENT], + .PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048), + .ScratchSpaceBasePointerHigh = 0, + .StackSize = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + uint32_t group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + pipeline->cs_thread_width_max = DIV_ROUND_UP(group_size, prog_data->simd_size); + uint32_t remainder = group_size & (prog_data->simd_size - 1); + + if (remainder > 0) + pipeline->cs_right_mask = ~0u >> (32 - remainder); + else + pipeline->cs_right_mask = ~0u >> (32 - prog_data->simd_size); + + + *pPipeline = anv_pipeline_to_handle(pipeline); + + return VK_SUCCESS; +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +// Pipeline layout functions + +VkResult anv_CreatePipelineLayout( + VkDevice _device, + const VkPipelineLayoutCreateInfo* pCreateInfo, + VkPipelineLayout* pPipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_pipeline_layout *layout; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); + + layout = anv_device_alloc(device, sizeof(*layout), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (layout == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->descriptorSetCount; + + uint32_t surface_start[VK_SHADER_STAGE_NUM] = { 0, }; + uint32_t sampler_start[VK_SHADER_STAGE_NUM] = { 0, }; + + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + layout->stage[s].surface_count = 0; + layout->stage[s].sampler_count = 0; + } + + for (uint32_t i = 0; i < pCreateInfo->descriptorSetCount; i++) { + ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[i]); + + layout->set[i].layout = set_layout; + for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) { + layout->set[i].surface_start[s] = surface_start[s]; + surface_start[s] += set_layout->stage[s].surface_count; + layout->set[i].sampler_start[s] = sampler_start[s]; + sampler_start[s] += set_layout->stage[s].sampler_count; + + layout->stage[s].surface_count += set_layout->stage[s].surface_count; + layout->stage[s].sampler_count += set_layout->stage[s].sampler_count; + } + } + + *pPipelineLayout = anv_pipeline_layout_to_handle(layout); + + return VK_SUCCESS; +} + +VkResult anv_DestroyPipelineLayout( + VkDevice _device, + VkPipelineLayout _pipelineLayout) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout); + + anv_device_free(device, pipeline_layout); + + return VK_SUCCESS; +} diff --git a/src/vulkan/anv_private.h b/src/vulkan/anv_private.h new file mode 100644 index 00000000000..d53f63d5d27 --- /dev/null +++ b/src/vulkan/anv_private.h @@ -0,0 +1,1143 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <pthread.h> +#include <assert.h> +#include <i915_drm.h> + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#else +#define VG(x) +#endif + +#include "brw_device_info.h" +#include "util/macros.h" +#include "util/list.h" + +#define VK_PROTOTYPES +#include <vulkan/vulkan.h> +#include <vulkan/vulkan_intel.h> +#include <vulkan/vk_wsi_lunarg.h> + +#include "anv_entrypoints.h" + +#include "brw_context.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define anv_noreturn __attribute__((__noreturn__)) +#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +static inline int32_t +align_i32(int32_t v, int32_t a) +{ + return (v + a - 1) & ~(a - 1); +} + +/** Alignment must be a power of 2. */ +static inline bool +anv_is_aligned(uintmax_t n, uintmax_t a) +{ + assert(a == (a & -a)); + return (n & (a - 1)) == 0; +} + +static inline uint32_t +anv_minify(uint32_t n, uint32_t levels) +{ + if (unlikely(n == 0)) + return 0; + else + return MAX(n >> levels, 1); +} + +static inline bool +anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask) +{ + if (*inout_mask & clear_mask) { + *inout_mask &= ~clear_mask; + return true; + } else { + return false; + } +} + +#define for_each_bit(b, dword) \ + for (uint32_t __dword = (dword); \ + (b) = __builtin_ffs(__dword) - 1, __dword; \ + __dword &= ~(1 << (b))) + +/* Define no kernel as 1, since that's an illegal offset for a kernel */ +#define NO_KERNEL 1 + +struct anv_common { + VkStructureType sType; + const void* pNext; +}; + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +static inline VkResult +vk_error(VkResult error) +{ +#ifdef DEBUG + fprintf(stderr, "vk_error: %x\n", error); +#endif + + return error; +} + +void __anv_finishme(const char *file, int line, const char *format, ...) + anv_printflike(3, 4); +void anv_loge(const char *format, ...) anv_printflike(1, 2); +void anv_loge_v(const char *format, va_list va); + +/** + * Print a FINISHME message, including its source location. + */ +#define anv_finishme(format, ...) \ + __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); + +/* A non-fatal assert. Useful for debugging. */ +#ifdef DEBUG +#define anv_assert(x) ({ \ + if (unlikely(!(x))) \ + fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \ +}) +#else +#define anv_assert(x) +#endif + +void anv_abortf(const char *format, ...) anv_noreturn anv_printflike(1, 2); +void anv_abortfv(const char *format, va_list va) anv_noreturn; + +#define stub_return(v) \ + do { \ + anv_finishme("stub %s", __func__); \ + return (v); \ + } while (0) + +#define stub() \ + do { \ + anv_finishme("stub %s", __func__); \ + return; \ + } while (0) + +/** + * A dynamically growable, circular buffer. Elements are added at head and + * removed from tail. head and tail are free-running uint32_t indices and we + * only compute the modulo with size when accessing the array. This way, + * number of bytes in the queue is always head - tail, even in case of + * wraparound. + */ + +struct anv_vector { + uint32_t head; + uint32_t tail; + uint32_t element_size; + uint32_t size; + void *data; +}; + +int anv_vector_init(struct anv_vector *queue, uint32_t element_size, uint32_t size); +void *anv_vector_add(struct anv_vector *queue); +void *anv_vector_remove(struct anv_vector *queue); + +static inline int +anv_vector_length(struct anv_vector *queue) +{ + return (queue->head - queue->tail) / queue->element_size; +} + +static inline void +anv_vector_finish(struct anv_vector *queue) +{ + free(queue->data); +} + +#define anv_vector_foreach(elem, queue) \ + static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \ + for (uint32_t __anv_vector_offset = (queue)->tail; \ + elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \ + __anv_vector_offset += (queue)->element_size) + +struct anv_bo { + int gem_handle; + uint32_t index; + uint64_t offset; + uint64_t size; + + /* This field is here for the benefit of the aub dumper. It can (and for + * userptr bos it must) be set to the cpu map of the buffer. Destroying + * the bo won't clean up the mmap, it's still the responsibility of the bo + * user to do that. */ + void *map; +}; + +/* Represents a lock-free linked list of "free" things. This is used by + * both the block pool and the state pools. Unfortunately, in order to + * solve the ABA problem, we can't use a single uint32_t head. + */ +union anv_free_list { + struct { + uint32_t offset; + + /* A simple count that is incremented every time the head changes. */ + uint32_t count; + }; + uint64_t u64; +}; + +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) + +struct anv_block_state { + union { + struct { + uint32_t next; + uint32_t end; + }; + uint64_t u64; + }; +}; + +struct anv_block_pool { + struct anv_device *device; + + struct anv_bo bo; + void *map; + int fd; + + /** + * Array of mmaps and gem handles owned by the block pool, reclaimed when + * the block pool is destroyed. + */ + struct anv_vector mmap_cleanups; + + uint32_t block_size; + + union anv_free_list free_list; + struct anv_block_state state; +}; + +static inline uint32_t +anv_block_pool_size(struct anv_block_pool *pool) +{ + return pool->state.end; +} + +struct anv_state { + uint32_t offset; + uint32_t alloc_size; + void *map; +}; + +struct anv_fixed_size_state_pool { + size_t state_size; + union anv_free_list free_list; + struct anv_block_state block; +}; + +#define ANV_MIN_STATE_SIZE_LOG2 6 +#define ANV_MAX_STATE_SIZE_LOG2 10 + +#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2) + +struct anv_state_pool { + struct anv_block_pool *block_pool; + struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS]; +}; + +struct anv_state_stream { + struct anv_block_pool *block_pool; + uint32_t next; + uint32_t current_block; + uint32_t end; +}; + +void anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_block_pool_finish(struct anv_block_pool *pool); +uint32_t anv_block_pool_alloc(struct anv_block_pool *pool); +void anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset); +void anv_state_pool_init(struct anv_state_pool *pool, + struct anv_block_pool *block_pool); +void anv_state_pool_finish(struct anv_state_pool *pool); +struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, + size_t state_size, size_t alignment); +void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state); +void anv_state_stream_init(struct anv_state_stream *stream, + struct anv_block_pool *block_pool); +void anv_state_stream_finish(struct anv_state_stream *stream); +struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, + uint32_t size, uint32_t alignment); + +/** + * Implements a pool of re-usable BOs. The interface is identical to that + * of block_pool except that each block is its own BO. + */ +struct anv_bo_pool { + struct anv_device *device; + + uint32_t bo_size; + + void *free_list; +}; + +void anv_bo_pool_init(struct anv_bo_pool *pool, + struct anv_device *device, uint32_t block_size); +void anv_bo_pool_finish(struct anv_bo_pool *pool); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo); +void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); + +struct anv_physical_device { + struct anv_instance * instance; + uint32_t chipset_id; + bool no_hw; + const char * path; + const char * name; + const struct brw_device_info * info; + uint64_t aperture_size; +}; + +struct anv_instance { + void * pAllocUserData; + PFN_vkAllocFunction pfnAlloc; + PFN_vkFreeFunction pfnFree; + uint32_t apiVersion; + uint32_t physicalDeviceCount; + struct anv_physical_device physicalDevice; +}; + +struct anv_meta_state { + struct { + VkPipeline pipeline; + } clear; + + struct { + VkPipeline pipeline; + VkPipelineLayout pipeline_layout; + VkDescriptorSetLayout ds_layout; + } blit; + + struct { + VkDynamicRasterState rs_state; + VkDynamicColorBlendState cb_state; + VkDynamicDepthStencilState ds_state; + } shared; +}; + +struct anv_queue { + struct anv_device * device; + + struct anv_state_pool * pool; + + /** + * Serial number of the most recently completed batch executed on the + * engine. + */ + struct anv_state completed_serial; + + /** + * The next batch submitted to the engine will be assigned this serial + * number. + */ + uint32_t next_serial; + + uint32_t last_collected_serial; +}; + +struct anv_device { + struct anv_instance * instance; + uint32_t chipset_id; + struct brw_device_info info; + int context_id; + int fd; + bool no_hw; + bool dump_aub; + + struct anv_bo_pool batch_bo_pool; + + struct anv_block_pool dynamic_state_block_pool; + struct anv_state_pool dynamic_state_pool; + + struct anv_block_pool instruction_block_pool; + struct anv_block_pool surface_state_block_pool; + struct anv_state_pool surface_state_pool; + + struct anv_meta_state meta_state; + + struct anv_state border_colors; + + struct anv_queue queue; + + struct anv_block_pool scratch_block_pool; + + struct anv_compiler * compiler; + struct anv_aub_writer * aub_writer; + pthread_mutex_t mutex; +}; + +void * +anv_device_alloc(struct anv_device * device, + size_t size, + size_t alignment, + VkSystemAllocType allocType); + +void +anv_device_free(struct anv_device * device, + void * mem); + +void* anv_gem_mmap(struct anv_device *device, + uint32_t gem_handle, uint64_t offset, uint64_t size); +void anv_gem_munmap(void *p, uint64_t size); +uint32_t anv_gem_create(struct anv_device *device, size_t size); +void anv_gem_close(struct anv_device *device, int gem_handle); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); +int anv_gem_wait(struct anv_device *device, int gem_handle, int64_t *timeout_ns); +int anv_gem_execbuffer(struct anv_device *device, + struct drm_i915_gem_execbuffer2 *execbuf); +int anv_gem_set_tiling(struct anv_device *device, int gem_handle, + uint32_t stride, uint32_t tiling); +int anv_gem_create_context(struct anv_device *device); +int anv_gem_destroy_context(struct anv_device *device, int context); +int anv_gem_get_param(int fd, uint32_t param); +int anv_gem_get_aperture(int fd, uint64_t *size); +int anv_gem_handle_to_fd(struct anv_device *device, int gem_handle); +int anv_gem_fd_to_handle(struct anv_device *device, int fd); +int anv_gem_userptr(struct anv_device *device, void *mem, size_t size); + +VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size); + +struct anv_reloc_list { + size_t num_relocs; + size_t array_length; + struct drm_i915_gem_relocation_entry * relocs; + struct anv_bo ** reloc_bos; +}; + +VkResult anv_reloc_list_init(struct anv_reloc_list *list, + struct anv_device *device); +void anv_reloc_list_finish(struct anv_reloc_list *list, + struct anv_device *device); + +uint64_t anv_reloc_list_add(struct anv_reloc_list *list, + struct anv_device *device, + uint32_t offset, struct anv_bo *target_bo, + uint32_t delta); + +struct anv_batch_bo { + /* Link in the anv_cmd_buffer.owned_batch_bos list */ + struct list_head link; + + struct anv_bo bo; + + /* Bytes actually consumed in this batch BO */ + size_t length; + + struct anv_reloc_list relocs; +}; + +struct anv_batch { + struct anv_device * device; + + void * start; + void * end; + void * next; + + struct anv_reloc_list * relocs; + + /* This callback is called (with the associated user data) in the event + * that the batch runs out of space. + */ + VkResult (*extend_cb)(struct anv_batch *, void *); + void * user_data; +}; + +void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords); +void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other); +uint64_t anv_batch_emit_reloc(struct anv_batch *batch, + void *location, struct anv_bo *bo, uint32_t offset); + +struct anv_address { + struct anv_bo *bo; + uint32_t offset; +}; + +#define __gen_address_type struct anv_address +#define __gen_user_data struct anv_batch + +static inline uint64_t +__gen_combine_address(struct anv_batch *batch, void *location, + const struct anv_address address, uint32_t delta) +{ + if (address.bo == NULL) { + return delta; + } else { + assert(batch->start <= location && location < batch->end); + + return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta); + } +} + +#include "gen7_pack.h" +#include "gen75_pack.h" +#undef GEN8_3DSTATE_MULTISAMPLE +#include "gen8_pack.h" + +#define anv_batch_emit(batch, cmd, ...) do { \ + void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \ + struct cmd __template = { \ + cmd ## _header, \ + __VA_ARGS__ \ + }; \ + cmd ## _pack(batch, __dst, &__template); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(__dst, cmd ## _length * 4)); \ + } while (0) + +#define anv_batch_emitn(batch, n, cmd, ...) ({ \ + void *__dst = anv_batch_emit_dwords(batch, n); \ + struct cmd __template = { \ + cmd ## _header, \ + .DwordLength = n - cmd ## _length_bias, \ + __VA_ARGS__ \ + }; \ + cmd ## _pack(batch, __dst, &__template); \ + __dst; \ + }) + +#define anv_batch_emit_merge(batch, dwords0, dwords1) \ + do { \ + uint32_t *dw; \ + \ + assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \ + dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \ + for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \ + dw[i] = (dwords0)[i] | (dwords1)[i]; \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\ + } while (0) + +#define GEN8_MOCS { \ + .MemoryTypeLLCeLLCCacheabilityControl = WB, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + +struct anv_device_memory { + struct anv_bo bo; + VkDeviceSize map_size; + void * map; +}; + +struct anv_dynamic_vp_state { + struct anv_state sf_clip_vp; + struct anv_state cc_vp; + struct anv_state scissor; +}; + +struct anv_dynamic_rs_state { + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; +}; + +struct anv_dynamic_ds_state { + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; +}; + +struct anv_dynamic_cb_state { + uint32_t state_color_calc[GEN8_COLOR_CALC_STATE_length]; + +}; + +struct anv_descriptor_slot { + int8_t dynamic_slot; + uint8_t index; +}; + +struct anv_descriptor_set_layout { + struct { + uint32_t surface_count; + struct anv_descriptor_slot *surface_start; + uint32_t sampler_count; + struct anv_descriptor_slot *sampler_start; + } stage[VK_SHADER_STAGE_NUM]; + + uint32_t count; + uint32_t num_dynamic_buffers; + uint32_t shader_stages; + struct anv_descriptor_slot entries[0]; +}; + +struct anv_descriptor { + struct anv_sampler *sampler; + struct anv_surface_view *view; +}; + +struct anv_descriptor_set { + struct anv_descriptor descriptors[0]; +}; + +VkResult +anv_descriptor_set_create(struct anv_device *device, + const struct anv_descriptor_set_layout *layout, + struct anv_descriptor_set **out_set); + +void +anv_descriptor_set_destroy(struct anv_device *device, + struct anv_descriptor_set *set); + +#define MAX_VBS 32 +#define MAX_SETS 8 +#define MAX_RTS 8 + +struct anv_pipeline_layout { + struct { + struct anv_descriptor_set_layout *layout; + uint32_t surface_start[VK_SHADER_STAGE_NUM]; + uint32_t sampler_start[VK_SHADER_STAGE_NUM]; + } set[MAX_SETS]; + + uint32_t num_sets; + + struct { + uint32_t surface_count; + uint32_t sampler_count; + } stage[VK_SHADER_STAGE_NUM]; +}; + +struct anv_buffer { + struct anv_device * device; + VkDeviceSize size; + + /* Set when bound */ + struct anv_bo * bo; + VkDeviceSize offset; +}; + +#define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0) +#define ANV_CMD_BUFFER_RS_DIRTY (1 << 2) +#define ANV_CMD_BUFFER_DS_DIRTY (1 << 3) +#define ANV_CMD_BUFFER_CB_DIRTY (1 << 4) +#define ANV_CMD_BUFFER_VP_DIRTY (1 << 5) +#define ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY (1 << 6) + +struct anv_vertex_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; +}; + +struct anv_descriptor_set_binding { + struct anv_descriptor_set * set; + uint32_t dynamic_offsets[128]; +}; + +/** State required while building cmd buffer */ +struct anv_cmd_state { + uint32_t current_pipeline; + uint32_t vb_dirty; + uint32_t dirty; + uint32_t compute_dirty; + uint32_t descriptors_dirty; + uint32_t scratch_size; + struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; + struct anv_framebuffer * framebuffer; + struct anv_render_pass * pass; + struct anv_subpass * subpass; + struct anv_dynamic_rs_state * rs_state; + struct anv_dynamic_ds_state * ds_state; + struct anv_dynamic_vp_state * vp_state; + struct anv_dynamic_cb_state * cb_state; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; + struct anv_vertex_binding vertex_bindings[MAX_VBS]; + struct anv_descriptor_set_binding descriptors[MAX_SETS]; +}; + +struct anv_cmd_pool { + struct list_head cmd_buffers; +}; + +#define ANV_CMD_BUFFER_BATCH_SIZE 8192 + +enum anv_cmd_buffer_exec_mode { + ANV_CMD_BUFFER_EXEC_MODE_PRIMARY, + ANV_CMD_BUFFER_EXEC_MODE_EMIT, + ANV_CMD_BUFFER_EXEC_MODE_CHAIN, + ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN, +}; + +struct anv_cmd_buffer { + struct anv_device * device; + + struct list_head pool_link; + + struct anv_batch batch; + + /* Fields required for the actual chain of anv_batch_bo's. + * + * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain(). + */ + struct list_head batch_bos; + struct list_head surface_bos; + uint32_t surface_next; + enum anv_cmd_buffer_exec_mode exec_mode; + + /* A vector of anv_batch_bo pointers for every batch or surface buffer + * referenced by this command buffer + * + * initialized by anv_cmd_buffer_init_batch_bo_chain() + */ + struct anv_vector seen_bbos; + + /* Information needed for execbuf + * + * These fields are generated by anv_cmd_buffer_prepare_execbuf(). + */ + struct { + struct drm_i915_gem_execbuffer2 execbuf; + + struct drm_i915_gem_exec_object2 * objects; + uint32_t bo_count; + struct anv_bo ** bos; + + /* Allocated length of the 'objects' and 'bos' arrays */ + uint32_t array_length; + + bool need_reloc; + } execbuf2; + + /* Serial for tracking buffer completion */ + uint32_t serial; + + /* Stream objects for storing temporary data */ + struct anv_state_stream surface_state_stream; + struct anv_state_stream dynamic_state_stream; + + VkCmdBufferOptimizeFlags opt_flags; + VkCmdBufferLevel level; + + struct anv_cmd_state state; +}; + +VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, + struct anv_cmd_buffer *secondary); +void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer); + +struct anv_bo * +anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer); +struct anv_reloc_list * +anv_cmd_buffer_current_surface_relocs(struct anv_cmd_buffer *cmd_buffer); +struct anv_state +anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); +struct anv_state +anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, + uint32_t size, uint32_t alignment); + +VkResult anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer); + +void anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, + struct anv_subpass *subpass); + +void anv_cmd_buffer_clear_attachments(struct anv_cmd_buffer *cmd_buffer, + struct anv_render_pass *pass, + const VkClearValue *clear_values); + +void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_aub_writer_destroy(struct anv_aub_writer *writer); + +struct anv_fence { + struct anv_bo bo; + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec2_objects[1]; + bool ready; +}; + +struct anv_shader_module { + uint32_t size; + char data[0]; +}; + +struct anv_shader { + struct anv_shader_module * module; + char entrypoint[0]; +}; + +struct anv_pipeline { + struct anv_device * device; + struct anv_batch batch; + uint32_t batch_data[256]; + struct anv_reloc_list batch_relocs; + struct anv_shader * shaders[VK_SHADER_STAGE_NUM]; + struct anv_pipeline_layout * layout; + bool use_repclear; + + struct brw_vs_prog_data vs_prog_data; + struct brw_wm_prog_data wm_prog_data; + struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; + bool writes_point_size; + struct brw_stage_prog_data * prog_data[VK_SHADER_STAGE_NUM]; + uint32_t scratch_start[VK_SHADER_STAGE_NUM]; + uint32_t total_scratch; + struct { + uint32_t vs_start; + uint32_t vs_size; + uint32_t nr_vs_entries; + uint32_t gs_start; + uint32_t gs_size; + uint32_t nr_gs_entries; + } urb; + + uint32_t active_stages; + struct anv_state_stream program_stream; + struct anv_state blend_state; + uint32_t vs_simd8; + uint32_t ps_simd8; + uint32_t ps_simd16; + uint32_t gs_vec4; + uint32_t gs_vertex_count; + uint32_t cs_simd; + + uint32_t vb_used; + uint32_t binding_stride[MAX_VBS]; + + uint32_t state_sf[GEN8_3DSTATE_SF_length]; + uint32_t state_vf[GEN8_3DSTATE_VF_length]; + uint32_t state_raster[GEN8_3DSTATE_RASTER_length]; + uint32_t state_wm_depth_stencil[GEN8_3DSTATE_WM_DEPTH_STENCIL_length]; + + uint32_t cs_thread_width_max; + uint32_t cs_right_mask; +}; + +struct anv_pipeline_create_info { + bool use_repclear; + bool disable_viewport; + bool disable_scissor; + bool disable_vs; + bool use_rectlist; +}; + +VkResult +anv_pipeline_create(VkDevice device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_pipeline_create_info *extra, + VkPipeline *pPipeline); + +struct anv_compiler *anv_compiler_create(struct anv_device *device); +void anv_compiler_destroy(struct anv_compiler *compiler); +int anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline); +void anv_compiler_free(struct anv_pipeline *pipeline); + +struct anv_format { + const char *name; + uint16_t surface_format; /**< RENDER_SURFACE_STATE.SurfaceFormat */ + uint8_t cpp; /**< Bytes-per-pixel of anv_format::surface_format. */ + uint8_t num_channels; + uint16_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + bool has_stencil; +}; + +const struct anv_format * +anv_format_for_vk_format(VkFormat format); +bool anv_is_vk_format_depth_or_stencil(VkFormat format); + +/** + * A proxy for the color surfaces, depth surfaces, and stencil surfaces. + */ +struct anv_surface { + /** + * Offset from VkImage's base address, as bound by vkBindImageMemory(). + */ + uint32_t offset; + + uint32_t stride; /**< RENDER_SURFACE_STATE.SurfacePitch */ + uint16_t qpitch; /**< RENDER_SURFACE_STATE.QPitch */ + + /** + * \name Alignment of miptree images, in units of pixels. + * + * These fields contain the real alignment values, not the values to be + * given to the GPU. For example, if h_align is 4, then program the GPU + * with HALIGN_4. + * \{ + */ + uint8_t h_align; /**< RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ + uint8_t v_align; /**< RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ + /** \} */ + + uint8_t tile_mode; /**< RENDER_SURFACE_STATE.TileMode */ +}; + +struct anv_image { + VkImageType type; + VkExtent3D extent; + VkFormat format; + uint32_t levels; + uint32_t array_size; + + VkDeviceSize size; + uint32_t alignment; + + /* Set when bound */ + struct anv_bo *bo; + VkDeviceSize offset; + + struct anv_swap_chain *swap_chain; + + /** RENDER_SURFACE_STATE.SurfaceType */ + uint8_t surf_type; + + /** Primary surface is either color or depth. */ + struct anv_surface primary_surface; + + /** Stencil surface is optional. */ + struct anv_surface stencil_surface; +}; + +struct anv_surface_view { + struct anv_state surface_state; /**< RENDER_SURFACE_STATE */ + struct anv_bo *bo; + uint32_t offset; /**< VkBufferCreateInfo::offset */ + uint32_t range; /**< VkBufferCreateInfo::range */ + VkFormat format; /**< VkBufferCreateInfo::format */ +}; + +struct anv_buffer_view { + struct anv_surface_view view; +}; + +struct anv_image_view { + struct anv_surface_view view; + VkExtent3D extent; +}; + +enum anv_attachment_view_type { + ANV_ATTACHMENT_VIEW_TYPE_COLOR, + ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL, +}; + +struct anv_attachment_view { + enum anv_attachment_view_type attachment_type; + VkExtent3D extent; +}; + +struct anv_color_attachment_view { + struct anv_attachment_view base; + struct anv_surface_view view; +}; + +struct anv_depth_stencil_view { + struct anv_attachment_view base; + + struct anv_bo *bo; + + uint32_t depth_offset; /**< Offset into bo. */ + uint32_t depth_stride; /**< 3DSTATE_DEPTH_BUFFER.SurfacePitch */ + uint32_t depth_format; /**< 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ + uint16_t depth_qpitch; /**< 3DSTATE_DEPTH_BUFFER.SurfaceQPitch */ + + uint32_t stencil_offset; /**< Offset into bo. */ + uint32_t stencil_stride; /**< 3DSTATE_STENCIL_BUFFER.SurfacePitch */ + uint16_t stencil_qpitch; /**< 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ +}; + +struct anv_image_create_info { + const VkImageCreateInfo *vk_info; + bool force_tile_mode; + uint8_t tile_mode; +}; + +VkResult anv_image_create(VkDevice _device, + const struct anv_image_create_info *info, + VkImage *pImage); + +void anv_image_view_init(struct anv_image_view *view, + struct anv_device *device, + const VkImageViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); + +void anv_color_attachment_view_init(struct anv_color_attachment_view *view, + struct anv_device *device, + const VkAttachmentViewCreateInfo* pCreateInfo, + struct anv_cmd_buffer *cmd_buffer); +void anv_fill_buffer_surface_state(void *state, VkFormat format, + uint32_t offset, uint32_t range); + +void anv_surface_view_fini(struct anv_device *device, + struct anv_surface_view *view); + +struct anv_sampler { + uint32_t state[4]; +}; + +struct anv_framebuffer { + uint32_t width; + uint32_t height; + uint32_t layers; + + /* Viewport for clears */ + VkDynamicViewportState vp_state; + + uint32_t attachment_count; + const struct anv_attachment_view * attachments[0]; +}; + +struct anv_subpass { + uint32_t input_count; + uint32_t * input_attachments; + uint32_t color_count; + uint32_t * color_attachments; + uint32_t * resolve_attachments; + uint32_t depth_stencil_attachment; +}; + +struct anv_render_pass_attachment { + VkFormat format; + uint32_t samples; + VkAttachmentLoadOp load_op; + VkAttachmentLoadOp stencil_load_op; +}; + +struct anv_render_pass { + uint32_t attachment_count; + uint32_t subpass_count; + + struct anv_render_pass_attachment * attachments; + struct anv_subpass subpasses[0]; +}; + +void anv_device_init_meta(struct anv_device *device); +void anv_device_finish_meta(struct anv_device *device); + +void *anv_lookup_entrypoint(const char *name); + +#define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) _obj; \ + } + +#define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \ + \ + static inline struct __anv_type * \ + __anv_type ## _from_handle(__VkType _handle) \ + { \ + return (struct __anv_type *) _handle.handle; \ + } \ + \ + static inline __VkType \ + __anv_type ## _to_handle(struct __anv_type *_obj) \ + { \ + return (__VkType) { .handle = (uint64_t) _obj }; \ + } + +#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \ + struct __anv_type *__name = __anv_type ## _from_handle(__handle) + +ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCmdBuffer) +ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice) +ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance) +ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice) +ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue) +ANV_DEFINE_HANDLE_CASTS(anv_swap_chain, VkSwapChainWSI); + +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCmdPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_attachment_view, VkAttachmentView) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_cb_state, VkDynamicColorBlendState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_ds_state, VkDynamicDepthStencilState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_rs_state, VkDynamicRasterState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_dynamic_vp_state, VkDynamicViewportState) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView); +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader, VkShader) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule) + +#define ANV_DEFINE_STRUCT_CASTS(__anv_type, __VkType) \ + \ + static inline const __VkType * \ + __anv_type ## _to_ ## __VkType(const struct __anv_type *__anv_obj) \ + { \ + return (const __VkType *) __anv_obj; \ + } + +#define ANV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name) \ + const __VkType *__vk_name = anv_common_to_ ## __VkType(__common_name) + +ANV_DEFINE_STRUCT_CASTS(anv_common, VkMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkBufferMemoryBarrier) +ANV_DEFINE_STRUCT_CASTS(anv_common, VkImageMemoryBarrier) + +#ifdef __cplusplus +} +#endif diff --git a/src/vulkan/anv_query.c b/src/vulkan/anv_query.c new file mode 100644 index 00000000000..d7903ee2cb8 --- /dev/null +++ b/src/vulkan/anv_query.c @@ -0,0 +1,352 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "anv_private.h" + +struct anv_query_pool_slot { + uint64_t begin; + uint64_t end; + uint64_t available; +}; + +struct anv_query_pool { + VkQueryType type; + uint32_t slots; + struct anv_bo bo; +}; + +VkResult anv_CreateQueryPool( + VkDevice _device, + const VkQueryPoolCreateInfo* pCreateInfo, + VkQueryPool* pQueryPool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_query_pool *pool; + VkResult result; + size_t size; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO); + + switch (pCreateInfo->queryType) { + case VK_QUERY_TYPE_OCCLUSION: + break; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return VK_UNSUPPORTED; + default: + unreachable(""); + } + + pool = anv_device_alloc(device, sizeof(*pool), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pool == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + size = pCreateInfo->slots * sizeof(struct anv_query_pool_slot); + result = anv_bo_init_new(&pool->bo, device, size); + if (result != VK_SUCCESS) + goto fail; + + pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size); + + *pQueryPool = anv_query_pool_to_handle(pool); + + return VK_SUCCESS; + + fail: + anv_device_free(device, pool); + + return result; +} + +VkResult anv_DestroyQueryPool( + VkDevice _device, + VkQueryPool _pool) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, _pool); + + anv_gem_munmap(pool->bo.map, pool->bo.size); + anv_gem_close(device, pool->bo.gem_handle); + anv_device_free(device, pool); + + return VK_SUCCESS; +} + +VkResult anv_GetQueryPoolResults( + VkDevice _device, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + size_t* pDataSize, + void* pData, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + struct anv_query_pool_slot *slot = pool->bo.map; + int64_t timeout = INT64_MAX; + uint32_t *dst32 = pData; + uint64_t *dst64 = pData; + uint64_t result; + int ret; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return VK_UNSUPPORTED; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + if (flags & VK_QUERY_RESULT_64_BIT) + *pDataSize = queryCount * sizeof(uint64_t); + else + *pDataSize = queryCount * sizeof(uint32_t); + + if (pData == NULL) + return VK_SUCCESS; + + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout); + if (ret == -1) + return vk_error(VK_ERROR_UNKNOWN); + } + + for (uint32_t i = 0; i < queryCount; i++) { + result = slot[startQuery + i].end - slot[startQuery + i].begin; + if (flags & VK_QUERY_RESULT_64_BIT) { + *dst64++ = result; + } else { + if (result > UINT32_MAX) + result = UINT32_MAX; + *dst32++ = result; + } + } + + return VK_SUCCESS; +} + +static void +anv_batch_emit_ps_depth_count(struct anv_batch *batch, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WritePSDepthCount, + .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */ +} + +void anv_CmdBeginQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot, + VkQueryControlFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot)); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdEndQuery( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t slot) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + anv_batch_emit_ps_depth_count(&cmd_buffer->batch, &pool->bo, + slot * sizeof(struct anv_query_pool_slot) + 8); + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + default: + unreachable(""); + } +} + +void anv_CmdResetQueryPool( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount) +{ + stub(); +} + +#define TIMESTAMP 0x2358 + +void anv_CmdWriteTimestamp( + VkCmdBuffer cmdBuffer, + VkTimestampType timestampType, + VkBuffer destBuffer, + VkDeviceSize destOffset) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + struct anv_bo *bo = buffer->bo; + + switch (timestampType) { + case VK_TIMESTAMP_TYPE_TOP: + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP, + .MemoryAddress = { bo, buffer->offset + destOffset }); + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = TIMESTAMP + 4, + .MemoryAddress = { bo, buffer->offset + destOffset + 4 }); + break; + + case VK_TIMESTAMP_TYPE_BOTTOM: + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .DestinationAddressType = DAT_PPGTT, + .PostSyncOperation = WriteTimestamp, + .Address = /* FIXME: This is only lower 32 bits */ + { bo, buffer->offset + destOffset }); + break; + + default: + break; + } +} + +#define alu_opcode(v) __gen_field((v), 20, 31) +#define alu_operand1(v) __gen_field((v), 10, 19) +#define alu_operand2(v) __gen_field((v), 0, 9) +#define alu(opcode, operand1, operand2) \ + alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2) + +#define OPCODE_NOOP 0x000 +#define OPCODE_LOAD 0x080 +#define OPCODE_LOADINV 0x480 +#define OPCODE_LOAD0 0x081 +#define OPCODE_LOAD1 0x481 +#define OPCODE_ADD 0x100 +#define OPCODE_SUB 0x101 +#define OPCODE_AND 0x102 +#define OPCODE_OR 0x103 +#define OPCODE_XOR 0x104 +#define OPCODE_STORE 0x180 +#define OPCODE_STOREINV 0x580 + +#define OPERAND_R0 0x00 +#define OPERAND_R1 0x01 +#define OPERAND_R2 0x02 +#define OPERAND_R3 0x03 +#define OPERAND_R4 0x04 +#define OPERAND_SRCA 0x20 +#define OPERAND_SRCB 0x21 +#define OPERAND_ACCU 0x31 +#define OPERAND_ZF 0x32 +#define OPERAND_CF 0x33 + +#define CS_GPR(n) (0x2600 + (n) * 8) + +static void +emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg, + struct anv_bo *bo, uint32_t offset) +{ + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg, + .MemoryAddress = { bo, offset }); + anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM, + .RegisterAddress = reg + 4, + .MemoryAddress = { bo, offset + 4 }); +} + +void anv_CmdCopyQueryPoolResults( + VkCmdBuffer cmdBuffer, + VkQueryPool queryPool, + uint32_t startQuery, + uint32_t queryCount, + VkBuffer destBuffer, + VkDeviceSize destOffset, + VkDeviceSize destStride, + VkQueryResultFlags flags) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer); + ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); + ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer); + uint32_t slot_offset, dst_offset; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + /* Where is the availabilty info supposed to go? */ + anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT"); + return; + } + + assert(pool->type == VK_QUERY_TYPE_OCCLUSION); + + /* FIXME: If we're not waiting, should we just do this on the CPU? */ + if (flags & VK_QUERY_RESULT_WAIT_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL, + .CommandStreamerStallEnable = true, + .StallAtPixelScoreboard = true); + + dst_offset = buffer->offset + destOffset; + for (uint32_t i = 0; i < queryCount; i++) { + + slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot); + + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset); + emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8); + + /* FIXME: We need to clamp the result for 32 bit. */ + + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH); + dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1); + dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0); + dw[3] = alu(OPCODE_SUB, 0, 0); + dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2), + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset }); + + if (flags & VK_QUERY_RESULT_64_BIT) + anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM, + .RegisterAddress = CS_GPR(2) + 4, + /* FIXME: This is only lower 32 bits */ + .MemoryAddress = { buffer->bo, dst_offset + 4 }); + + dst_offset += destStride; + } +} diff --git a/src/vulkan/anv_util.c b/src/vulkan/anv_util.c new file mode 100644 index 00000000000..0311fbcd84f --- /dev/null +++ b/src/vulkan/anv_util.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> + +#include "anv_private.h" + +/** Log an error message. */ +void anv_printflike(1, 2) +anv_loge(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_loge_v(format, va); + va_end(va); +} + +/** \see anv_loge() */ +void +anv_loge_v(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +void anv_printflike(3, 4) +__anv_finishme(const char *file, int line, const char *format, ...) +{ + va_list ap; + char buffer[256]; + + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); +} + +void anv_noreturn anv_printflike(1, 2) +anv_abortf(const char *format, ...) +{ + va_list va; + + va_start(va, format); + anv_abortfv(format, va); + va_end(va); +} + +void anv_noreturn +anv_abortfv(const char *format, va_list va) +{ + fprintf(stderr, "vk: error: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); + abort(); +} + +int +anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size) +{ + assert(util_is_power_of_two(size)); + assert(element_size < size && util_is_power_of_two(element_size)); + + vector->head = 0; + vector->tail = 0; + vector->element_size = element_size; + vector->size = size; + vector->data = malloc(size); + + return vector->data != NULL; +} + +void * +anv_vector_add(struct anv_vector *vector) +{ + uint32_t offset, size, split, tail; + void *data; + + if (vector->head - vector->tail == vector->size) { + size = vector->size * 2; + data = malloc(size); + if (data == NULL) + return NULL; + split = align_u32(vector->tail, vector->size); + tail = vector->tail & (vector->size - 1); + if (vector->head - split < vector->size) { + memcpy(data + tail, + vector->data + tail, + split - vector->tail); + memcpy(data + vector->size, + vector->data, vector->head - split); + } else { + memcpy(data + tail, + vector->data + tail, + vector->head - vector->tail); + } + free(vector->data); + vector->data = data; + vector->size = size; + } + + assert(vector->head - vector->tail < vector->size); + + offset = vector->head & (vector->size - 1); + vector->head += vector->element_size; + + return vector->data + offset; +} + +void * +anv_vector_remove(struct anv_vector *vector) +{ + uint32_t offset; + + if (vector->head == vector->tail) + return NULL; + + assert(vector->head - vector->tail <= vector->size); + + offset = vector->tail & (vector->size - 1); + vector->tail += vector->element_size; + + return vector->data + offset; +} diff --git a/src/vulkan/anv_x11.c b/src/vulkan/anv_x11.c new file mode 100644 index 00000000000..9ffce8d8cbf --- /dev/null +++ b/src/vulkan/anv_x11.c @@ -0,0 +1,299 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +#include <xcb/xcb.h> +#include <xcb/dri3.h> +#include <xcb/present.h> + +static const VkFormat formats[] = { + VK_FORMAT_B5G6R5_UNORM, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, +}; + +VkResult anv_GetDisplayInfoWSI( + VkDisplayWSI display, + VkDisplayInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + VkDisplayFormatPropertiesWSI *properties = pData; + size_t size; + + if (pDataSize == NULL) + return VK_ERROR_INVALID_POINTER; + + switch (infoType) { + case VK_DISPLAY_INFO_TYPE_FORMAT_PROPERTIES_WSI: + size = sizeof(properties[0]) * ARRAY_SIZE(formats); + + if (pData == NULL) { + *pDataSize = size; + return VK_SUCCESS; + } + + if (*pDataSize < size) + return vk_error(VK_ERROR_INVALID_VALUE); + + *pDataSize = size; + + for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) + properties[i].swapChainFormat = formats[i]; + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +struct anv_swap_chain { + struct anv_device * device; + xcb_connection_t * conn; + xcb_window_t window; + xcb_gc_t gc; + VkExtent2D extent; + uint32_t count; + struct { + struct anv_image * image; + struct anv_device_memory * memory; + xcb_pixmap_t pixmap; + } images[0]; +}; + +VkResult anv_CreateSwapChainWSI( + VkDevice _device, + const VkSwapChainCreateInfoWSI* pCreateInfo, + VkSwapChainWSI* pSwapChain) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + struct anv_swap_chain *chain; + xcb_void_cookie_t cookie; + VkResult result; + size_t size; + int ret; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAP_CHAIN_CREATE_INFO_WSI); + + size = sizeof(*chain) + pCreateInfo->imageCount * sizeof(chain->images[0]); + chain = anv_device_alloc(device, size, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (chain == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + chain->device = device; + chain->conn = (xcb_connection_t *) pCreateInfo->pNativeWindowSystemHandle; + chain->window = (xcb_window_t) (uintptr_t) pCreateInfo->pNativeWindowHandle; + chain->count = pCreateInfo->imageCount; + chain->extent = pCreateInfo->imageExtent; + + for (uint32_t i = 0; i < chain->count; i++) { + VkDeviceMemory memory_h; + VkImage image_h; + struct anv_image *image; + struct anv_surface *surface; + struct anv_device_memory *memory; + + anv_image_create(_device, + &(struct anv_image_create_info) { + .force_tile_mode = true, + .tile_mode = XMAJOR, + .vk_info = + &(VkImageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = VK_IMAGE_TYPE_2D, + .format = pCreateInfo->imageFormat, + .extent = { + .width = pCreateInfo->imageExtent.width, + .height = pCreateInfo->imageExtent.height, + .depth = 1 + }, + .mipLevels = 1, + .arraySize = 1, + .samples = 1, + /* FIXME: Need a way to use X tiling to allow scanout */ + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .flags = 0, + }}, + &image_h); + + image = anv_image_from_handle(image_h); + surface = &image->primary_surface; + + anv_AllocMemory(_device, + &(VkMemoryAllocInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO, + .allocationSize = image->size, + .memoryTypeIndex = 0, + }, + &memory_h); + + memory = anv_device_memory_from_handle(memory_h); + + anv_BindImageMemory(VK_NULL_HANDLE, anv_image_to_handle(image), + memory_h, 0); + + ret = anv_gem_set_tiling(device, memory->bo.gem_handle, + surface->stride, I915_TILING_X); + if (ret) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + int fd = anv_gem_handle_to_fd(device, memory->bo.gem_handle); + if (fd == -1) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + uint32_t bpp = 32; + uint32_t depth = 24; + xcb_pixmap_t pixmap = xcb_generate_id(chain->conn); + + cookie = + xcb_dri3_pixmap_from_buffer_checked(chain->conn, + pixmap, + chain->window, + image->size, + pCreateInfo->imageExtent.width, + pCreateInfo->imageExtent.height, + surface->stride, + depth, bpp, fd); + + chain->images[i].image = image; + chain->images[i].memory = memory; + chain->images[i].pixmap = pixmap; + image->swap_chain = chain; + + xcb_discard_reply(chain->conn, cookie.sequence); + } + + chain->gc = xcb_generate_id(chain->conn); + if (!chain->gc) { + result = vk_error(VK_ERROR_UNKNOWN); + goto fail; + } + + cookie = xcb_create_gc(chain->conn, + chain->gc, + chain->window, + XCB_GC_GRAPHICS_EXPOSURES, + (uint32_t []) { 0 }); + xcb_discard_reply(chain->conn, cookie.sequence); + + *pSwapChain = anv_swap_chain_to_handle(chain); + + return VK_SUCCESS; + + fail: + return result; +} + +VkResult anv_DestroySwapChainWSI( + VkSwapChainWSI _chain) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + anv_device_free(chain->device, chain); + + return VK_SUCCESS; +} + +VkResult anv_GetSwapChainInfoWSI( + VkSwapChainWSI _chain, + VkSwapChainInfoTypeWSI infoType, + size_t* pDataSize, + void* pData) +{ + ANV_FROM_HANDLE(anv_swap_chain, chain, _chain); + + VkSwapChainImageInfoWSI *images; + size_t size; + + switch (infoType) { + case VK_SWAP_CHAIN_INFO_TYPE_PERSISTENT_IMAGES_WSI: + size = sizeof(*images) * chain->count; + if (pData && *pDataSize < size) + return VK_ERROR_INVALID_VALUE; + + *pDataSize = size; + if (!pData) + return VK_SUCCESS; + + images = pData; + for (uint32_t i = 0; i < chain->count; i++) { + images[i].image = anv_image_to_handle(chain->images[i].image); + images[i].memory = anv_device_memory_to_handle(chain->images[i].memory); + } + + return VK_SUCCESS; + + default: + return VK_UNSUPPORTED; + } +} + +VkResult anv_QueuePresentWSI( + VkQueue queue_, + const VkPresentInfoWSI* pPresentInfo) +{ + ANV_FROM_HANDLE(anv_image, image, pPresentInfo->image); + + struct anv_swap_chain *chain = image->swap_chain; + xcb_void_cookie_t cookie; + xcb_pixmap_t pixmap; + + assert(pPresentInfo->sType == VK_STRUCTURE_TYPE_PRESENT_INFO_WSI); + + if (chain == NULL) + return vk_error(VK_ERROR_INVALID_VALUE); + + pixmap = XCB_NONE; + for (uint32_t i = 0; i < chain->count; i++) { + if (image == chain->images[i].image) { + pixmap = chain->images[i].pixmap; + break; + } + } + + if (pixmap == XCB_NONE) + return vk_error(VK_ERROR_INVALID_VALUE); + + cookie = xcb_copy_area(chain->conn, + pixmap, + chain->window, + chain->gc, + 0, 0, + 0, 0, + chain->extent.width, + chain->extent.height); + xcb_discard_reply(chain->conn, cookie.sequence); + + xcb_flush(chain->conn); + + return VK_SUCCESS; +} diff --git a/src/vulkan/gen75_pack.h b/src/vulkan/gen75_pack.h new file mode 100644 index 00000000000..7602fb7bb76 --- /dev/null +++ b/src/vulkan/gen75_pack.h @@ -0,0 +1,8389 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for HSW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN75_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_VS_length 0x00000002 + +struct GEN75_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 30) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 0 + +#define GEN75_GPGPU_CSR_BASE_ADDRESS_length 0x00000002 + +struct GEN75_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN75_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + +} + +#define GEN75_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_REGISTER_MEM_length 0x00000003 + +struct GEN75_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN75_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN75_PIPELINE_SELECT_length 0x00000001 + +struct GEN75_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN75_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN75_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +#define GEN75_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN75_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN75_MEMORY_OBJECT_CONTROL_STATE { + uint32_t LLCeLLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->LLCeLLCCacheabilityControlLLCCC, 1, 2) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN75_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + bool GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + bool DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + bool IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + bool InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN75_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw2 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw3 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw_InstructionMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw5 = + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN75_STATE_PREFETCH_length_bias 0x00000002 +#define GEN75_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN75_STATE_PREFETCH_length 0x00000002 + +struct GEN75_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN75_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN75_STATE_SIP_length_bias 0x00000002 +#define GEN75_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +#define GEN75_STATE_SIP_length 0x00000002 + +struct GEN75_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN75_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN75_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN75_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN75_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN75_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN75_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN75_3DPRIMITIVE_length_bias 0x00000002 +#define GEN75_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN75_3DPRIMITIVE_length 0x00000007 + +struct GEN75_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN75_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN75_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; +}; + +static inline void +GEN75_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN75_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN75_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000003 + +struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + __gen_address_type BindingTablePoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 7, 10) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BindingTablePoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN75_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN75_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN75_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN75_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN75_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN75_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN75_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN75_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_CLIP_length 0x00000004 + +struct GEN75_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN75_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN75_3DSTATE_CONSTANT_BODY_length 0x00000006 + +struct GEN75_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN75_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_GS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_HS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_PS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_CONSTANT_VS_length 0x00000007 + +struct GEN75_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN75_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN75_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN75_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_DEPTH_BUFFER_length 0x00000007 + +struct GEN75_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN75_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN75_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN75_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN75_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_DS_length 0x00000006 + +struct GEN75_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN75_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN75_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN75_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN75_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN75_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN75_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN75_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN75_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN75_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN75_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_GATHER_POOL_ALLOC_length 0x00000003 + +struct GEN75_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type GatherPoolUpperBound; +}; + +static inline void +GEN75_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_mbo(4, 5) | + __gen_field(dw_MemoryObjectControlState, 0, 3) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->GatherPoolUpperBound, dw2); + +} + +#define GEN75_3DSTATE_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_GS_length 0x00000007 + +struct GEN75_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + uint32_t GSaccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define REORDER_LEADING 0 +#define REORDER_TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool GSEnable; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->GSaccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_HS_length 0x00000007 + +struct GEN75_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + bool Enable; + bool StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool HSaccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN75_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + __gen_field(values->MaximumNumberofThreads, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->HSaccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 12) | + 0; + +} + +#define GEN75_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_INDEX_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN75_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_MemoryObjectControlState, 12, 15) | + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN75_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN75_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN75_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN75_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN75_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN75_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN75_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN75_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_MULTISAMPLE_length 0x00000004 + +struct GEN75_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool MultiSampleEnable; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; +}; + +static inline void +GEN75_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MultiSampleEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN75_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN75_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN75_3DSTATE_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +#define GEN75_3DSTATE_PS_length 0x00000008 + +struct GEN75_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadPriority; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t SampleMask; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; + bool PSAccessesUAV; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN75_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->SampleMask, 12, 19) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PSAccessesUAV, 5, 5) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length_bias 0x00000002 +#define GEN75_3DSTATE_RAST_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_RAST_MULTISAMPLE_length 0x00000006 + +struct GEN75_3DSTATE_RAST_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NRM_NUMRASTSAMPLES_1 0 +#define NRM_NUMRASTSAMPLES_2 1 +#define NRM_NUMRASTSAMPLES_4 2 +#define NRM_NUMRASTSAMPLES_8 3 +#define NRM_NUMRASTSAMPLES_16 4 + uint32_t NumberofRasterizationMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; + float Sample11XOffset; + float Sample11YOffset; + float Sample10XOffset; + float Sample10YOffset; + float Sample9XOffset; + float Sample9YOffset; + float Sample8XOffset; + float Sample8YOffset; + float Sample15XOffset; + float Sample15YOffset; + float Sample14XOffset; + float Sample14YOffset; + float Sample13XOffset; + float Sample13YOffset; + float Sample12XOffset; + float Sample12YOffset; +}; + +static inline void +GEN75_3DSTATE_RAST_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_RAST_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->NumberofRasterizationMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[4] = + __gen_field(values->Sample11XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample11YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample10XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample10YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample9XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample9YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample8XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample8YOffset * (1 << 4), 0, 3) | + 0; + + dw[5] = + __gen_field(values->Sample15XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample15YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample14XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample14YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample13XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample13YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample12XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample12YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN75_PALETTE_ENTRY_length 0x00000001 + +struct GEN75_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN75_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN75_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN75_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN75_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN75_3DSTATE_SBE_length_bias 0x00000002 +#define GEN75_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +#define GEN75_3DSTATE_SBE_length 0x0000000e + +struct GEN75_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN75_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN75_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_SF_length_bias 0x00000002 +#define GEN75_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +#define GEN75_3DSTATE_SF_length 0x00000007 + +struct GEN75_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool ViewTransformEnable; + uint32_t FrontWinding; + bool AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + float LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + bool LineStippleEnable; + bool ScissorRectangleEnable; + bool RTIndependentRasterizationEnable; + uint32_t MultisampleRasterizationMode; + bool LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + float PointWidth; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN75_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->LineStippleEnable, 14, 14) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->RTIndependentRasterizationEnable, 10, 10) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[5] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[6] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN75_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_SO_BUFFER_length 0x00000004 + +struct GEN75_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN75_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN75_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN75_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN75_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN75_SO_DECL_ENTRY_length 0x00000002 + +#define GEN75_SO_DECL_length 0x00000001 + +struct GEN75_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN75_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN75_SO_DECL_ENTRY { + struct GEN75_SO_DECL Stream3Decl; + struct GEN75_SO_DECL Stream2Decl; + struct GEN75_SO_DECL Stream1Decl; + struct GEN75_SO_DECL Stream0Decl; +}; + +static inline void +GEN75_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN75_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN75_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN75_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN75_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN75_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN75_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_STENCIL_BUFFER_length 0x00000003 + +struct GEN75_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN75_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN75_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN75_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_STREAMOUT_length 0x00000003 + +struct GEN75_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN75_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN75_3DSTATE_TE_length_bias 0x00000002 +#define GEN75_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN75_3DSTATE_TE_length 0x00000004 + +struct GEN75_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN75_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN75_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_DS_length 0x00000002 + +struct GEN75_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 30) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_GS_length 0x00000002 + +struct GEN75_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 30) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN75_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_URB_HS_length 0x00000002 + +struct GEN75_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN75_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 30) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN75_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN75_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN75_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN75_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN75_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN75_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN75_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN75_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN75_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN75_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN75_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN75_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN75_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_3DSTATE_VF_length_bias 0x00000002 +#define GEN75_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VF_length 0x00000002 + +struct GEN75_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN75_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN75_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN75_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN75_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN75_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN75_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN75_3DSTATE_VS_length_bias 0x00000002 +#define GEN75_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +#define GEN75_3DSTATE_VS_length 0x00000006 + +struct GEN75_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool VSaccessesUAV; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; +}; + +static inline void +GEN75_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->VSaccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN75_3DSTATE_WM_length_bias 0x00000002 +#define GEN75_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +#define GEN75_3DSTATE_WM_length 0x00000003 + +struct GEN75_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + bool PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + bool RTIndependentRasterizationEnable; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +#define OFF 0 +#define ON 1 + uint32_t PSUAVonly; +}; + +static inline void +GEN75_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->RTIndependentRasterizationEnable, 5, 5) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + __gen_field(values->PSUAVonly, 30, 30) | + 0; + +} + +#define GEN75_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN75_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +#define GEN75_GPGPU_OBJECT_length 0x00000008 + +struct GEN75_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define Slice0 0 +#define Slice1 1 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN75_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_GPGPU_WALKER_length_bias 0x00000002 +#define GEN75_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +#define GEN75_GPGPU_WALKER_length 0x0000000b + +struct GEN75_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN75_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN75_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN75_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN75_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN75_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN75_MEDIA_OBJECT_length 0x00000000 + +struct GEN75_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define EitherSlice 0 + uint32_t SliceDestinationSelect; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 19) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN75_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN75_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN75_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN75_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN75_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN75_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN75_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + bool ScoreboardMask; + bool DualMode; + bool Repel; + bool QuadMode; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN75_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->QuadMode, 29, 29) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN75_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN75_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN75_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool DisablePreemption; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN75_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->DisablePreemption, 8, 8) | + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN75_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN75_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +#define GEN75_MEDIA_VFE_STATE_length 0x00000008 + +struct GEN75_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t GPGPUMode; + uint32_t HalfSliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN75_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + __gen_field(values->HalfSliceDisable, 0, 1) | + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN75_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN75_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN75_MI_ARB_CHECK_length 0x00000001 + +struct GEN75_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN75_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +#define GEN75_MI_ARB_ON_OFF_length 0x00000001 + +struct GEN75_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ArbitrationEnable; +}; + +static inline void +GEN75_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN75_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN75_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN75_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN75_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +#define GEN75_MI_BATCH_BUFFER_START_length 0x00000002 + +struct GEN75_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + bool AddOffsetEnable; + bool PredicationEnable; + uint32_t NonPrivileged; + bool ClearCommandBufferEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN75_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->NonPrivileged, 13, 13) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN75_MI_CLFLUSH_length_bias 0x00000002 +#define GEN75_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN75_MI_CLFLUSH_length 0x00000000 + +struct GEN75_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +#define GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + +struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN75_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN75_MI_FLUSH_length_bias 0x00000001 +#define GEN75_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +#define GEN75_MI_FLUSH_length 0x00000001 + +struct GEN75_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + bool GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + bool RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + bool StateInstructionCacheInvalidate; +}; + +static inline void +GEN75_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN75_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_MEM_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN75_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN75_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN75_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN75_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN75_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN75_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN75_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN75_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN75_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN75_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 1 + +#define GEN75_MI_LOAD_URB_MEM_length 0x00000003 + +struct GEN75_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_MATH_length_bias 0x00000002 +#define GEN75_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN75_MI_MATH_length 0x00000000 + +struct GEN75_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN75_MI_NOOP_length_bias 0x00000001 +#define GEN75_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN75_MI_NOOP_length 0x00000001 + +struct GEN75_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN75_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN75_MI_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN75_MI_PREDICATE_length 0x00000001 + +struct GEN75_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN75_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN75_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN75_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN75_MI_REPORT_HEAD_length 0x00000001 + +struct GEN75_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN75_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN75_MI_RS_CONTEXT_length 0x00000001 + +struct GEN75_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN75_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN75_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN75_MI_RS_CONTROL_length 0x00000001 + +struct GEN75_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN75_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN75_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN75_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN75_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN75_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + 0; + + uint32_t dw2 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->DestinationAddress, dw2); + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN75_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN75_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +#define GEN75_MI_SEMAPHORE_MBOX_length 0x00000003 + +struct GEN75_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RVESYNC 1 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t GeneralRegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN75_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->GeneralRegisterSelect, 8, 13) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN75_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN75_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN75_MI_SET_CONTEXT_length 0x00000002 + +struct GEN75_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN75_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN75_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN75_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1, \ + .PREDICATEENABLE = 6 + +#define GEN75_MI_SET_PREDICATE_length 0x00000001 + +struct GEN75_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PredicateAlways 0 +#define PredicateonClear 1 +#define PredicateonSet 2 +#define PredicateDisable 3 + bool PREDICATEENABLE; +}; + +static inline void +GEN75_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 1) | + 0; + +} + +#define GEN75_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN75_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN75_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN75_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN75_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN75_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN75_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN75_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 1 + +#define GEN75_MI_STORE_URB_MEM_length 0x00000003 + +struct GEN75_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN75_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN75_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN75_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN75_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN75_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN75_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN75_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN75_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN75_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN75_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN75_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN75_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN75_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN75_MI_UPDATE_GTT_length 0x00000000 + +struct GEN75_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN75_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN75_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN75_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN75_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN75_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN75_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN75_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN75_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN75_MI_URB_CLEAR_length 0x00000002 + +struct GEN75_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN75_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN75_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN75_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN75_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN75_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN75_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN75_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN75_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN75_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN75_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN75_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN75_PIPE_CONTROL_length_bias 0x00000002 +#define GEN75_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +#define GEN75_PIPE_CONTROL_length 0x00000005 + +struct GEN75_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN75_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +#define GEN75_SCISSOR_RECT_length 0x00000002 + +struct GEN75_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN75_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN75_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN75_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN75_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + +} + +#define GEN75_BLEND_STATE_length 0x00000002 + +struct GEN75_BLEND_STATE { + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + bool AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +}; + +static inline void +GEN75_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +#define GEN75_CC_VIEWPORT_length 0x00000002 + +struct GEN75_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN75_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN75_COLOR_CALC_STATE_length 0x00000006 + +struct GEN75_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN75_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN75_DEPTH_STENCIL_STATE_length 0x00000003 + +struct GEN75_DEPTH_STENCIL_STATE { + bool StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + bool DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + bool DepthBufferWriteEnable; +}; + +static inline void +GEN75_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +#define GEN75_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN75_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN75_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + + dw[7] = + 0; + +} + +#define GEN75_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN75_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN75_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + +#define GEN75_RENDER_SURFACE_STATE_length 0x00000008 + +struct GEN75_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t IntegerSurfaceFormat; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + struct GEN75_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + __gen_address_type MCSBaseAddress; + uint32_t MCSSurfacePitch; + __gen_address_type AppendCounterAddress; + bool AppendCounterEnable; + bool MCSEnable; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + uint32_t ShaderChannelSelectR; + uint32_t ShaderChannelSelectG; + uint32_t ShaderChannelSelectB; + uint32_t ShaderChannelSelectA; + float ResourceMinLOD; +}; + +static inline void +GEN75_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->IntegerSurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN75_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + __gen_field(dw_SurfaceObjectControlState, 16, 19) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + uint32_t dw6 = + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + + dw[7] = + __gen_field(values->ShaderChannelSelectR, 25, 27) | + __gen_field(values->ShaderChannelSelectG, 22, 24) | + __gen_field(values->ShaderChannelSelectB, 19, 21) | + __gen_field(values->ShaderChannelSelectA, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + +} + +#define GEN75_SAMPLER_BORDER_COLOR_STATE_length 0x00000014 + +#define GEN75_BORDER_COLOR_UINT32_SINT32_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT32_SINT32 { + uint32_t BorderColorRedui32integerunclamp; + uint32_t BorderColorRedsi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp; + uint32_t BorderColorGreensi32integerunclamp; + uint32_t BorderColorBlueui32integerunclamp; + uint32_t BorderColorBluesi32integerunclamp; + uint32_t BorderColorGreenui32integerunclamp0; + uint32_t BorderColorGreensi32integerunclamp0; + uint32_t BorderColorAlphaui32integerunclamp; + uint32_t BorderColorAlphasi32integerunclamp; +}; + +static inline void +GEN75_BORDER_COLOR_UINT32_SINT32_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT32_SINT32 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorRedui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorRedsi32integerunclamp, 0, 31) | + 0; + + dw[1] = + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[2] = + __gen_field(values->BorderColorBlueui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorBluesi32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreenui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorGreensi32integerunclamp, 0, 31) | + 0; + + dw[3] = + __gen_field(values->BorderColorAlphaui32integerunclamp, 0, 31) | + __gen_field(values->BorderColorAlphasi32integerunclamp, 0, 31) | + 0; + +} + +#define GEN75_BORDER_COLOR_UINT16_SINT16_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT16_SINT16 { + uint32_t BorderColorGreenclamptouint16; + uint32_t BorderColorGreenclamptosint16; + uint32_t BorderColorRedclamptouint16; + uint32_t BorderColorRedclamptosint16; + uint32_t BorderColorAlphaclamptouint16; + uint32_t BorderColorAlphaclamptosint16; + uint32_t BorderColorBlueclamptouint16; + uint32_t BorderColorBlueclamptosint16; +}; + +static inline void +GEN75_BORDER_COLOR_UINT16_SINT16_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT16_SINT16 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorGreenclamptouint16, 16, 31) | + __gen_field(values->BorderColorGreenclamptosint16, 16, 31) | + __gen_field(values->BorderColorRedclamptouint16, 0, 15) | + __gen_field(values->BorderColorRedclamptosint16, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->BorderColorAlphaclamptouint16, 16, 31) | + __gen_field(values->BorderColorAlphaclamptosint16, 16, 31) | + __gen_field(values->BorderColorBlueclamptouint16, 0, 15) | + __gen_field(values->BorderColorBlueclamptosint16, 0, 15) | + 0; + + dw[3] = + 0; + +} + +#define GEN75_BORDER_COLOR_UINT8_SINT8_length 0x00000004 + +struct GEN75_BORDER_COLOR_UINT8_SINT8 { + uint32_t BorderColorAlphaclamptouint8; + uint32_t BorderColorAlphaclamptosint8; + uint32_t BorderColorBlueclamptouint8; + uint32_t BorderColorBlueclamptosint8; + uint32_t BorderColorGreenclamptouint8; + uint32_t BorderColorGreenclamptosint8; + uint32_t BorderRedAlphaclamptouint8; + uint32_t BorderRedAlphaclamptosint8; +}; + +static inline void +GEN75_BORDER_COLOR_UINT8_SINT8_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_BORDER_COLOR_UINT8_SINT8 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BorderColorAlphaclamptouint8, 24, 31) | + __gen_field(values->BorderColorAlphaclamptosint8, 24, 31) | + __gen_field(values->BorderColorBlueclamptouint8, 16, 23) | + __gen_field(values->BorderColorBlueclamptosint8, 16, 23) | + __gen_field(values->BorderColorGreenclamptouint8, 8, 15) | + __gen_field(values->BorderColorGreenclamptosint8, 8, 15) | + __gen_field(values->BorderRedAlphaclamptouint8, 0, 7) | + __gen_field(values->BorderRedAlphaclamptosint8, 0, 7) | + 0; + + dw[1] = + 0; + + dw[2] = + 0; + + dw[3] = + 0; + +} + +struct GEN75_SAMPLER_BORDER_COLOR_STATE { + float BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; + struct GEN75_BORDER_COLOR_UINT32_SINT32 BorderColor; + struct GEN75_BORDER_COLOR_UINT16_SINT16 BorderColor0; + struct GEN75_BORDER_COLOR_UINT8_SINT8 BorderColor1; +}; + +static inline void +GEN75_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->BorderColorRedDX100GL) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_float(values->BorderColorGreen) | + 0; + + dw[2] = + __gen_float(values->BorderColorBlue) | + 0; + + dw[3] = + __gen_float(values->BorderColorAlpha) | + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + 0; + } + + GEN75_BORDER_COLOR_UINT32_SINT32_pack(data, &dw[16], &values->BorderColor); +} + +#define GEN75_SAMPLER_STATE_length 0x00000004 + +struct GEN75_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define TRIQUAL_HIGHMAG_CLAMP_MIPFILTER 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN75_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN75_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen7_pack.h b/src/vulkan/gen7_pack.h new file mode 100644 index 00000000000..2204263e1dd --- /dev/null +++ b/src/vulkan/gen7_pack.h @@ -0,0 +1,6971 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for IVB. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN7_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_VS_length 0x00000002 + +struct GEN7_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 29) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 1 + +#define GEN7_MI_STORE_REGISTER_MEM_length 0x00000003 + +struct GEN7_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN7_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN7_PIPELINE_SELECT_length 0x00000001 + +struct GEN7_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN7_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN7_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 8 + +#define GEN7_STATE_BASE_ADDRESS_length 0x0000000a + +#define GEN7_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN7_MEMORY_OBJECT_CONTROL_STATE { + uint32_t GraphicsDataTypeGFDT; + uint32_t LLCCacheabilityControlLLCCC; + uint32_t L3CacheabilityControlL3CC; +}; + +static inline void +GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->GraphicsDataTypeGFDT, 2, 2) | + __gen_field(values->LLCCacheabilityControlLLCCC, 1, 1) | + __gen_field(values->L3CacheabilityControlL3CC, 0, 0) | + 0; + +} + +struct GEN7_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + uint32_t StatelessDataPortAccessForceWriteThru; + bool GeneralStateBaseAddressModifyEnable; + __gen_address_type SurfaceStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + __gen_address_type GeneralStateAccessUpperBound; + bool GeneralStateAccessUpperBoundModifyEnable; + __gen_address_type DynamicStateAccessUpperBound; + bool DynamicStateAccessUpperBoundModifyEnable; + __gen_address_type IndirectObjectAccessUpperBound; + bool IndirectObjectAccessUpperBoundModifyEnable; + __gen_address_type InstructionAccessUpperBound; + bool InstructionAccessUpperBoundModifyEnable; +}; + +static inline void +GEN7_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 8, 11) | + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 4, 7) | + __gen_field(values->StatelessDataPortAccessForceWriteThru, 3, 3) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw2 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 8, 11) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceStateBaseAddress, dw2); + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw3 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 8, 11) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->DynamicStateBaseAddress, dw3); + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 8, 11) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->IndirectObjectBaseAddress, dw4); + + uint32_t dw_InstructionMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw5 = + __gen_field(dw_InstructionMemoryObjectControlState, 8, 11) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->InstructionBaseAddress, dw5); + + uint32_t dw6 = + __gen_field(values->GeneralStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->GeneralStateAccessUpperBound, dw6); + + uint32_t dw7 = + __gen_field(values->DynamicStateAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[7] = + __gen_combine_address(data, &dw[7], values->DynamicStateAccessUpperBound, dw7); + + uint32_t dw8 = + __gen_field(values->IndirectObjectAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[8] = + __gen_combine_address(data, &dw[8], values->IndirectObjectAccessUpperBound, dw8); + + uint32_t dw9 = + __gen_field(values->InstructionAccessUpperBoundModifyEnable, 0, 0) | + 0; + + dw[9] = + __gen_combine_address(data, &dw[9], values->InstructionAccessUpperBound, dw9); + +} + +#define GEN7_STATE_PREFETCH_length_bias 0x00000002 +#define GEN7_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN7_STATE_PREFETCH_length 0x00000002 + +struct GEN7_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN7_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN7_STATE_SIP_length_bias 0x00000002 +#define GEN7_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 0 + +#define GEN7_STATE_SIP_length 0x00000002 + +struct GEN7_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SystemInstructionPointer; +}; + +static inline void +GEN7_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SystemInstructionPointer, 4, 31) | + 0; + +} + +#define GEN7_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN7_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN7_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN7_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN7_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + +} + +#define GEN7_3DPRIMITIVE_length_bias 0x00000002 +#define GEN7_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN7_3DPRIMITIVE_length 0x00000007 + +struct GEN7_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN7_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN7_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AACoverageBias; + float AACoverageSlope; + float AACoverageEndCapBias; + float AACoverageEndCapSlope; +}; + +static inline void +GEN7_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; +}; + +static inline void +GEN7_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; +}; + +static inline void +GEN7_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN7_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN7_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN7_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN7_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN7_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN7_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN7_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthClearValue, 0, 31) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_CLIP_length 0x00000004 + +struct GEN7_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t FrontWinding; + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define APIMODE_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool ViewportZClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define CLIPMODE_NORMAL 0 +#define CLIPMODE_REJECT_ALL 3 +#define CLIPMODE_ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN7_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->FrontWinding, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->ViewportZClipTestEnable, 27, 27) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_DS_length 0x00000007 + +#define GEN7_3DSTATE_CONSTANT_BODY_length 0x00000006 + +struct GEN7_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw_ConstantBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + uint32_t dw2 = + __gen_field(dw_ConstantBufferObjectControlState, 0, 4) | + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->PointerToConstantBuffer1, dw3); + + uint32_t dw4 = + 0; + + dw[4] = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer2, dw4); + + uint32_t dw5 = + 0; + + dw[5] = + __gen_combine_address(data, &dw[5], values->PointerToConstantBuffer3, dw5); + +} + +struct GEN7_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_GS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_HS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_PS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_CONSTANT_VS_length 0x00000007 + +struct GEN7_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN7_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN7_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN7_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_DEPTH_BUFFER_length 0x00000007 + +struct GEN7_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; +#define SURFTYPE_CUBEmustbezero 0 + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t DepthCoordinateOffsetY; + uint32_t DepthCoordinateOffsetX; + uint32_t RenderTargetViewExtent; +}; + +static inline void +GEN7_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[3] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[4] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 3) | + 0; + + dw[5] = + __gen_field(values->DepthCoordinateOffsetY, 16, 31) | + __gen_field(values->DepthCoordinateOffsetX, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + 0; + +} + +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 37, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDEPTH_STENCIL_STATE; +}; + +static inline void +GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDEPTH_STENCIL_STATE, 6, 31) | + __gen_mbo(0, 0) | + 0; + +} + +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN7_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN7_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN7_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 4 + +#define GEN7_3DSTATE_DS_length 0x00000006 + +struct GEN7_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool ComputeWCoordinateEnable; + bool DSCacheDisable; + bool DSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->DSCacheDisable, 1, 1) | + __gen_field(values->DSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_GS_length 0x00000007 + +struct GEN7_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t MaximumNumberofThreads; +#define GSCTL_CUT 0 +#define GSCTL_SID 1 + uint32_t ControlDataFormat; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamID; +#define SINGLE 0 +#define DUAL_INSTANCE 1 +#define DUAL_OBJECT 2 + uint32_t DispatchMode; + uint32_t GSStatisticsEnable; + uint32_t GSInvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; + bool ReorderEnable; + bool DiscardAdjacency; + bool GSEnable; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterforURBData, 0, 3) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->ControlDataFormat, 24, 24) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamID, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->GSStatisticsEnable, 10, 10) | + __gen_field(values->GSInvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderEnable, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->GSEnable, 0, 0) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_HS_length 0x00000007 + +struct GEN7_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t MaximumNumberofThreads; + bool Enable; + bool StatisticsEnable; + uint32_t InstanceCount; + uint32_t KernelStartPointer; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t SemaphoreHandle; +}; + +static inline void +GEN7_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->MaximumNumberofThreads, 0, 6) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + dw[3] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[4] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[6] = + __gen_offset(values->SemaphoreHandle, 0, 11) | + 0; + +} + +#define GEN7_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_INDEX_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + bool CutIndexEnable; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + uint32_t DwordLength; + __gen_address_type BufferStartingAddress; + __gen_address_type BufferEndingAddress; +}; + +static inline void +GEN7_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_MemoryObjectControlState, 12, 15) | + __gen_field(values->CutIndexEnable, 10, 10) | + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->BufferEndingAddress, dw2); + +} + +#define GEN7_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN7_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN7_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN7_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN7_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN7_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN7_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN7_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN7_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_MULTISAMPLE_length 0x00000004 + +struct GEN7_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define PIXLOC_CENTER 0 +#define PIXLOC_UL_CORNER 1 + uint32_t PixelLocation; +#define NUMSAMPLES_1 0 +#define NUMSAMPLES_4 2 +#define NUMSAMPLES_8 3 + uint32_t NumberofMultisamples; + float Sample3XOffset; + float Sample3YOffset; + float Sample2XOffset; + float Sample2YOffset; + float Sample1XOffset; + float Sample1YOffset; + float Sample0XOffset; + float Sample0YOffset; + float Sample7XOffset; + float Sample7YOffset; + float Sample6XOffset; + float Sample6YOffset; + float Sample5XOffset; + float Sample5YOffset; + float Sample4XOffset; + float Sample4YOffset; +}; + +static inline void +GEN7_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + + dw[2] = + __gen_field(values->Sample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[3] = + __gen_field(values->Sample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->Sample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->Sample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->Sample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->Sample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->Sample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->Sample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->Sample4YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN7_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN7_3DSTATE_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 6 + +#define GEN7_3DSTATE_PS_length 0x00000008 + +struct GEN7_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlowSPF; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; + uint32_t SamplerCount; +#define FTZ 0 +#define RET 1 + uint32_t DenormalMode; + uint32_t BindingTableEntryCount; +#define IEEE745 0 +#define Alt 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + bool PushConstantEnable; + bool AttributeEnable; + bool oMaskPresenttoRenderTarget; + bool RenderTargetFastClearEnable; + bool DualSourceBlendEnable; + bool RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterforConstantSetupData0; + uint32_t DispatchGRFStartRegisterforConstantSetupData1; + uint32_t DispatchGRFStartRegisterforConstantSetupData2; + uint32_t KernelStartPointer1; + uint32_t KernelStartPointer2; +}; + +static inline void +GEN7_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer0, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleProgramFlowSPF, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->DenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->AttributeEnable, 10, 10) | + __gen_field(values->oMaskPresenttoRenderTarget, 9, 9) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->DualSourceBlendEnable, 7, 7) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[5] = + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterforConstantSetupData2, 0, 6) | + 0; + + dw[6] = + __gen_offset(values->KernelStartPointer1, 6, 31) | + 0; + + dw[7] = + __gen_offset(values->KernelStartPointer2, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define _0KB 0 + uint32_t ConstantBufferOffset; +#define _0KB 0 + uint32_t ConstantBufferSize; +}; + +static inline void +GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 19) | + __gen_field(values->ConstantBufferSize, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN7_PALETTE_ENTRY_length 0x00000001 + +struct GEN7_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN7_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN7_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN7_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN7_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 7) | + 0; + +} + +#define GEN7_3DSTATE_SBE_length_bias 0x00000002 +#define GEN7_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 12 + +#define GEN7_3DSTATE_SBE_length 0x0000000e + +struct GEN7_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SWIZ_0_15 0 +#define SWIZ_16_31 1 + uint32_t AttributeSwizzleControlMode; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + bool Attribute2n1ComponentOverrideW; + bool Attribute2n1ComponentOverrideZ; + bool Attribute2n1ComponentOverrideY; + bool Attribute2n1ComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2n1ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2n1SwizzleSelect; + uint32_t Attribute2n1SourceAttribute; + bool Attribute2nComponentOverrideW; + bool Attribute2nComponentOverrideZ; + bool Attribute2nComponentOverrideY; + bool Attribute2nComponentOverrideX; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t Attribute2nConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t Attribute2nSwizzleSelect; + uint32_t Attribute2nSourceAttribute; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable310; + uint32_t Attribute7WrapShortestEnables; + uint32_t Attribute6WrapShortestEnables; + uint32_t Attribute5WrapShortestEnables; + uint32_t Attribute4WrapShortestEnables; + uint32_t Attribute3WrapShortestEnables; + uint32_t Attribute2WrapShortestEnables; + uint32_t Attribute1WrapShortestEnables; + uint32_t Attribute0WrapShortestEnables; + uint32_t Attribute15WrapShortestEnables; + uint32_t Attribute14WrapShortestEnables; + uint32_t Attribute13WrapShortestEnables; + uint32_t Attribute12WrapShortestEnables; + uint32_t Attribute11WrapShortestEnables; + uint32_t Attribute10WrapShortestEnables; + uint32_t Attribute9WrapShortestEnables; + uint32_t Attribute8WrapShortestEnables; +}; + +static inline void +GEN7_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AttributeSwizzleControlMode, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[2] = + __gen_field(values->Attribute2n1ComponentOverrideW, 31, 31) | + __gen_field(values->Attribute2n1ComponentOverrideZ, 30, 30) | + __gen_field(values->Attribute2n1ComponentOverrideY, 29, 29) | + __gen_field(values->Attribute2n1ComponentOverrideX, 28, 28) | + __gen_field(values->Attribute2n1ConstantSource, 25, 26) | + __gen_field(values->Attribute2n1SwizzleSelect, 22, 23) | + __gen_field(values->Attribute2n1SourceAttribute, 16, 20) | + __gen_field(values->Attribute2nComponentOverrideW, 15, 15) | + __gen_field(values->Attribute2nComponentOverrideZ, 14, 14) | + __gen_field(values->Attribute2nComponentOverrideY, 13, 13) | + __gen_field(values->Attribute2nComponentOverrideX, 12, 12) | + __gen_field(values->Attribute2nConstantSource, 9, 10) | + __gen_field(values->Attribute2nSwizzleSelect, 6, 7) | + __gen_field(values->Attribute2nSourceAttribute, 0, 4) | + 0; + + dw[10] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ConstantInterpolationEnable310, 0, 31) | + 0; + + dw[12] = + __gen_field(values->Attribute7WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute6WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute5WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute4WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute3WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute2WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute1WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute0WrapShortestEnables, 0, 3) | + 0; + + dw[13] = + __gen_field(values->Attribute15WrapShortestEnables, 28, 31) | + __gen_field(values->Attribute14WrapShortestEnables, 24, 27) | + __gen_field(values->Attribute13WrapShortestEnables, 20, 23) | + __gen_field(values->Attribute12WrapShortestEnables, 16, 19) | + __gen_field(values->Attribute11WrapShortestEnables, 12, 15) | + __gen_field(values->Attribute10WrapShortestEnables, 8, 11) | + __gen_field(values->Attribute9WrapShortestEnables, 4, 7) | + __gen_field(values->Attribute8WrapShortestEnables, 0, 3) | + 0; + +} + +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN7_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_SF_length_bias 0x00000002 +#define GEN7_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 5 + +#define GEN7_3DSTATE_SF_length 0x00000007 + +struct GEN7_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define D32_FLOAT_S8X24_UINT 0 +#define D32_FLOAT 1 +#define D24_UNORM_S8_UINT 2 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t DepthBufferSurfaceFormat; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool ViewTransformEnable; + uint32_t FrontWinding; + bool AntiAliasingEnable; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; + float LineWidth; + uint32_t LineEndCapAntialiasingRegionWidth; + bool ScissorRectangleEnable; + uint32_t MultisampleRasterizationMode; + bool LastPixelEnable; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; +#define Vertex0 0 +#define Vertex1 1 +#define Vertex2 2 + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + uint32_t VertexSubPixelPrecisionSelect; + uint32_t UsePointWidthState; + float PointWidth; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN7_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DepthBufferSurfaceFormat, 12, 14) | + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->ViewTransformEnable, 1, 1) | + __gen_field(values->FrontWinding, 0, 0) | + 0; + + dw[2] = + __gen_field(values->AntiAliasingEnable, 31, 31) | + __gen_field(values->CullMode, 29, 30) | + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + __gen_field(values->ScissorRectangleEnable, 11, 11) | + __gen_field(values->MultisampleRasterizationMode, 8, 9) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->UsePointWidthState, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[5] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[6] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN7_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_SO_BUFFER_length 0x00000004 + +struct GEN7_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOBufferIndex; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + __gen_address_type SurfaceEndAddress; +}; + +static inline void +GEN7_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 11) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->SurfaceEndAddress, dw3); + +} + +#define GEN7_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN7_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN7_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN7_SO_DECL_ENTRY_length 0x00000002 + +#define GEN7_SO_DECL_length 0x00000001 + +struct GEN7_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN7_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN7_SO_DECL_ENTRY { + struct GEN7_SO_DECL Stream3Decl; + struct GEN7_SO_DECL Stream2Decl; + struct GEN7_SO_DECL Stream1Decl; + struct GEN7_SO_DECL Stream0Decl; +}; + +static inline void +GEN7_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN7_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN7_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN7_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN7_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN7_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN7_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_STENCIL_BUFFER_length 0x00000003 + +struct GEN7_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; +}; + +static inline void +GEN7_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(dw_StencilBufferObjectControlState, 25, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + +} + +#define GEN7_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN7_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_STREAMOUT_length 0x00000003 + +struct GEN7_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t RenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; + uint32_t SOBufferEnable3; + uint32_t SOBufferEnable2; + uint32_t SOBufferEnable1; + uint32_t SOBufferEnable0; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; +}; + +static inline void +GEN7_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->RenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->SOBufferEnable3, 11, 11) | + __gen_field(values->SOBufferEnable2, 10, 10) | + __gen_field(values->SOBufferEnable1, 9, 9) | + __gen_field(values->SOBufferEnable0, 8, 8) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + +} + +#define GEN7_3DSTATE_TE_length_bias 0x00000002 +#define GEN7_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN7_3DSTATE_TE_length 0x00000004 + +struct GEN7_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN7_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN7_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_DS_length 0x00000002 + +struct GEN7_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 29) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_GS_length 0x00000002 + +struct GEN7_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 29) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN7_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_URB_HS_length 0x00000002 + +struct GEN7_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN7_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 29) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN7_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN7_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN7_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN7_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; +#define VERTEXDATA 0 +#define INSTANCEDATA 1 + uint32_t BufferAccessType; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE VertexBufferMemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t VertexFetchInvalidate; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + __gen_address_type EndAddress; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN7_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_VertexBufferMemoryObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_VertexBufferMemoryObjectControlState, &values->VertexBufferMemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->BufferAccessType, 20, 20) | + __gen_field(dw_VertexBufferMemoryObjectControlState, 16, 19) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->VertexFetchInvalidate, 12, 12) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->EndAddress, dw2); + + dw[3] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +struct GEN7_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN7_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN7_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN7_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN7_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN7_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN7_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN7_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN7_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN7_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN7_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN7_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN7_3DSTATE_VS_length_bias 0x00000002 +#define GEN7_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 4 + +#define GEN7_3DSTATE_VS_length 0x00000006 + +struct GEN7_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnableVME; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ScratchSpaceBaseOffset; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterforURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool VertexCacheDisable; + bool VSFunctionEnable; +}; + +static inline void +GEN7_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[2] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnableVME, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->ScratchSpaceBaseOffset, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = + __gen_field(values->DispatchGRFStartRegisterforURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[5] = + __gen_field(values->MaximumNumberofThreads, 25, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->VSFunctionEnable, 0, 0) | + 0; + +} + +#define GEN7_3DSTATE_WM_length_bias 0x00000002 +#define GEN7_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 1 + +#define GEN7_3DSTATE_WM_length 0x00000003 + +struct GEN7_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool DepthBufferClear; + bool ThreadDispatchEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; + bool PixelShaderKillPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; +#define EDSC_NORMAL 0 +#define EDSC_PSEXEC 1 +#define EDSC_PREPS 2 + uint32_t EarlyDepthStencilControl; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; + bool PixelShaderUsesInputCoverageMask; + uint32_t LineEndCapAntialiasingRegionWidth; + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t MultisampleRasterizationMode; +#define MSDISPMODE_PERSAMPLE 0 +#define MSDISPMODE_PERPIXEL 1 + uint32_t MultisampleDispatchMode; +}; + +static inline void +GEN7_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->DepthBufferClear, 30, 30) | + __gen_field(values->ThreadDispatchEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->PixelShaderKillPixel, 25, 25) | + __gen_field(values->PixelShaderComputedDepthMode, 23, 24) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->PixelShaderUsesSourceDepth, 20, 20) | + __gen_field(values->PixelShaderUsesSourceW, 19, 19) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 10, 10) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->MultisampleRasterizationMode, 0, 1) | + 0; + + dw[2] = + __gen_field(values->MultisampleDispatchMode, 31, 31) | + 0; + +} + +#define GEN7_GPGPU_OBJECT_length_bias 0x00000002 +#define GEN7_GPGPU_OBJECT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 4, \ + .DwordLength = 6 + +#define GEN7_GPGPU_OBJECT_length 0x00000008 + +struct GEN7_GPGPU_OBJECT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t SharedLocalMemoryFixedOffset; + uint32_t InterfaceDescriptorOffset; + uint32_t SharedLocalMemoryOffset; + uint32_t EndofThreadGroup; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define EitherHalfSlice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t ThreadGroupIDX; + uint32_t ThreadGroupIDY; + uint32_t ThreadGroupIDZ; + uint32_t ExecutionMask; +}; + +static inline void +GEN7_GPGPU_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SharedLocalMemoryFixedOffset, 7, 7) | + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SharedLocalMemoryOffset, 28, 31) | + __gen_field(values->EndofThreadGroup, 24, 24) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDX, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDZ, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_GPGPU_WALKER_length_bias 0x00000002 +#define GEN7_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcodeA = 5, \ + .DwordLength = 9 + +#define GEN7_GPGPU_WALKER_length 0x0000000b + +struct GEN7_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcodeA; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN7_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcodeA, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[3] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[6] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDStartingZ, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[9] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[10] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN7_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN7_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN7_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN7_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN7_MEDIA_OBJECT_length 0x00000000 + +struct GEN7_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define HalfSlice1 2 +#define HalfSlice0 1 +#define Eitherhalfslice 0 + uint32_t HalfSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->HalfSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN7_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN7_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN7_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN7_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN7_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN7_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN7_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + bool ScoreboardMask; + bool DualMode; + bool Repel; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalEndY; + uint32_t LocalEndX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN7_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 4) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->DualMode, 31, 31) | + __gen_field(values->Repel, 30, 30) | + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + __gen_field(values->LocalEndY, 16, 24) | + __gen_field(values->LocalEndX, 0, 8) | + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN7_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN7_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN7_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN7_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN7_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN7_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN7_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 6 + +#define GEN7_MEDIA_VFE_STATE_length 0x00000008 + +struct GEN7_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; +#define NoMMIOreadwriteallowed 0 +#define MMIOreadwritetoanyaddress 2 + uint32_t GatewayMMIOAccessControl; + uint32_t GPGPUMode; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN7_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + __gen_field(values->GatewayMMIOAccessControl, 3, 4) | + __gen_field(values->GPGPUMode, 2, 2) | + 0; + + dw[3] = + 0; + + dw[4] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[5] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[7] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN7_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN7_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN7_MI_ARB_CHECK_length 0x00000001 + +struct GEN7_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_ARB_ON_OFF_length_bias 0x00000001 +#define GEN7_MI_ARB_ON_OFF_header \ + .CommandType = 0, \ + .MICommandOpcode = 8 + +#define GEN7_MI_ARB_ON_OFF_length 0x00000001 + +struct GEN7_MI_ARB_ON_OFF { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ArbitrationEnable; +}; + +static inline void +GEN7_MI_ARB_ON_OFF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_ARB_ON_OFF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ArbitrationEnable, 0, 0) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN7_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN7_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN7_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN7_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 0 + +#define GEN7_MI_BATCH_BUFFER_START_length 0x00000002 + +struct GEN7_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool ClearCommandBufferEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN7_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ClearCommandBufferEnable, 11, 11) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + +} + +#define GEN7_MI_CLFLUSH_length_bias 0x00000002 +#define GEN7_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN7_MI_CLFLUSH_length 0x00000000 + +struct GEN7_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + __gen_address_type PageBaseAddressHigh; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->PageBaseAddressHigh, dw2); + + /* variable length fields follow */ +} + +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 0 + +#define GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000002 + +struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN7_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + +} + +#define GEN7_MI_FLUSH_length_bias 0x00000001 +#define GEN7_MI_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 4 + +#define GEN7_MI_FLUSH_length 0x00000001 + +struct GEN7_MI_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IndirectStatePointersDisable; + bool GenericMediaStateClear; +#define DontReset 0 +#define Reset 1 + bool GlobalSnapshotCountReset; +#define Flush 0 +#define DontFlush 1 + bool RenderCacheFlushInhibit; +#define DontInvalidate 0 +#define Invalidate 1 + bool StateInstructionCacheInvalidate; +}; + +static inline void +GEN7_MI_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IndirectStatePointersDisable, 5, 5) | + __gen_field(values->GenericMediaStateClear, 4, 4) | + __gen_field(values->GlobalSnapshotCountReset, 3, 3) | + __gen_field(values->RenderCacheFlushInhibit, 2, 2) | + __gen_field(values->StateInstructionCacheInvalidate, 1, 1) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN7_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN7_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN7_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN7_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 1 + +#define GEN7_MI_LOAD_REGISTER_MEM_length 0x00000003 + +struct GEN7_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN7_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + +} + +#define GEN7_MI_NOOP_length_bias 0x00000001 +#define GEN7_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN7_MI_NOOP_length 0x00000001 + +struct GEN7_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN7_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN7_MI_PREDICATE_length_bias 0x00000001 +#define GEN7_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN7_MI_PREDICATE_length 0x00000001 + +struct GEN7_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN7_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN7_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN7_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN7_MI_REPORT_HEAD_length 0x00000001 + +struct GEN7_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_SEMAPHORE_MBOX_length_bias 0x00000002 +#define GEN7_MI_SEMAPHORE_MBOX_header \ + .CommandType = 0, \ + .MICommandOpcode = 22, \ + .DwordLength = 1 + +#define GEN7_MI_SEMAPHORE_MBOX_length 0x00000003 + +struct GEN7_MI_SEMAPHORE_MBOX { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RVSYNC 0 +#define RBSYNC 2 +#define UseGeneralRegisterSelect 3 + uint32_t RegisterSelect; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; +}; + +static inline void +GEN7_MI_SEMAPHORE_MBOX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SEMAPHORE_MBOX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->RegisterSelect, 16, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + dw[2] = + 0; + +} + +#define GEN7_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN7_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN7_MI_SET_CONTEXT_length 0x00000002 + +struct GEN7_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool ExtendedStateSaveEnable; + bool ExtendedStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN7_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->ExtendedStateSaveEnable, 3, 3) | + __gen_field(values->ExtendedStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN7_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN7_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN7_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t DwordLength; + uint32_t Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->Address, 2, 31) | + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN7_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN7_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN7_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN7_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN7_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN7_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN7_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN7_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN7_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN7_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN7_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN7_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN7_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN7_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN7_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN7_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN7_MI_UPDATE_GTT_length 0x00000000 + +struct GEN7_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN7_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN7_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN7_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN7_MI_URB_CLEAR_length 0x00000002 + +struct GEN7_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN7_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 28) | + __gen_offset(values->URBAddress, 0, 13) | + 0; + +} + +#define GEN7_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN7_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN7_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN7_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN7_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN7_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN7_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN7_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN7_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCHorizontalBlankWaitEnable; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; +#define Notenabled 0 + uint32_t ConditionCodeWaitSelect; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBHorizontalBlankWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAHorizontalBlankWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN7_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCHorizontalBlankWaitEnable, 22, 22) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->ConditionCodeWaitSelect, 16, 19) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBHorizontalBlankWaitEnable, 13, 13) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAHorizontalBlankWaitEnable, 5, 5) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN7_PIPE_CONTROL_length_bias 0x00000002 +#define GEN7_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 3 + +#define GEN7_PIPE_CONTROL_length 0x00000005 + +struct GEN7_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint32_t ImmediateData; + uint32_t ImmediateData0; +}; + +static inline void +GEN7_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + dw[2] = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[3] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + + dw[4] = + __gen_field(values->ImmediateData, 0, 31) | + 0; + +} + +#define GEN7_SCISSOR_RECT_length 0x00000002 + +struct GEN7_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN7_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN7_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN7_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; +}; + +static inline void +GEN7_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + for (uint32_t i = 0, j = 12; i < 4; i += 1, j++) { + dw[j] = + 0; + } + +} + +#define GEN7_BLEND_STATE_length 0x00000002 + +struct GEN7_BLEND_STATE { + bool ColorBufferBlendEnable; + bool IndependentAlphaBlendEnable; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t AlphaBlendFunction; +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + uint32_t ColorBlendFunction; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaToCoverageEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; + bool LogicOpEnable; +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + uint32_t LogicOpFunction; + bool AlphaTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; +}; + +static inline void +GEN7_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaBlendFunction, 26, 28) | + __gen_field(values->SourceAlphaBlendFactor, 20, 24) | + __gen_field(values->DestinationAlphaBlendFactor, 15, 19) | + __gen_field(values->ColorBlendFunction, 11, 13) | + __gen_field(values->SourceBlendFactor, 5, 9) | + __gen_field(values->DestinationBlendFactor, 0, 4) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->AlphaToOneEnable, 30, 30) | + __gen_field(values->AlphaToCoverageDitherEnable, 29, 29) | + __gen_field(values->WriteDisableAlpha, 27, 27) | + __gen_field(values->WriteDisableRed, 26, 26) | + __gen_field(values->WriteDisableGreen, 25, 25) | + __gen_field(values->WriteDisableBlue, 24, 24) | + __gen_field(values->LogicOpEnable, 22, 22) | + __gen_field(values->LogicOpFunction, 18, 21) | + __gen_field(values->AlphaTestEnable, 16, 16) | + __gen_field(values->AlphaTestFunction, 13, 15) | + __gen_field(values->ColorDitherEnable, 12, 12) | + __gen_field(values->XDitherOffset, 10, 11) | + __gen_field(values->YDitherOffset, 8, 9) | + __gen_field(values->ColorClampRange, 2, 3) | + __gen_field(values->PreBlendColorClampEnable, 1, 1) | + __gen_field(values->PostBlendColorClampEnable, 0, 0) | + 0; + +} + +#define GEN7_CC_VIEWPORT_length 0x00000002 + +struct GEN7_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN7_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN7_COLOR_CALC_STATE_length 0x00000006 + +struct GEN7_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN7_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN7_DEPTH_STENCIL_STATE_length 0x00000003 + +struct GEN7_DEPTH_STENCIL_STATE { + bool StencilTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t StencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + bool StencilBufferWriteEnable; + bool DoubleSidedStencilEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t BackFaceStencilTestFunction; +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; + bool DepthTestEnable; +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + uint32_t DepthTestFunction; + bool DepthBufferWriteEnable; +}; + +static inline void +GEN7_DEPTH_STENCIL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_DEPTH_STENCIL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilTestEnable, 31, 31) | + __gen_field(values->StencilTestFunction, 28, 30) | + __gen_field(values->StencilFailOp, 25, 27) | + __gen_field(values->StencilPassDepthFailOp, 22, 24) | + __gen_field(values->StencilPassDepthPassOp, 19, 21) | + __gen_field(values->StencilBufferWriteEnable, 18, 18) | + __gen_field(values->DoubleSidedStencilEnable, 15, 15) | + __gen_field(values->BackFaceStencilTestFunction, 12, 14) | + __gen_field(values->BackfaceStencilFailOp, 9, 11) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 6, 8) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 3, 5) | + 0; + + dw[1] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->DepthTestEnable, 31, 31) | + __gen_field(values->DepthTestFunction, 27, 29) | + __gen_field(values->DepthBufferWriteEnable, 26, 26) | + 0; + +} + +#define GEN7_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN7_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; +}; + +static inline void +GEN7_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[2] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[3] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[4] = + __gen_field(values->ConstantURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[5] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 7) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + +} + +#define GEN7_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN7_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN7_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 5, 31) | + 0; + +} + +#define GEN7_RENDER_SURFACE_STATE_length 0x00000008 + +struct GEN7_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; + uint32_t SurfaceVerticalAlignment; +#define HALIGN_4 0 +#define HALIGN_8 1 + uint32_t SurfaceHorizontalAlignment; + uint32_t TiledSurface; +#define TILEWALK_XMAJOR 0 +#define TILEWALK_YMAJOR 1 + uint32_t TileWalk; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; +#define ARYSPC_FULL 0 +#define ARYSPC_LOD0 1 + uint32_t SurfaceArraySpacing; + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + uint32_t CubeFaceEnables; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define RTROTATE_0DEG 0 +#define RTROTATE_90DEG 1 +#define RTROTATE_270DEG 3 + uint32_t RenderTargetRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSFMT_MSS 0 +#define MSFMT_DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t MinimumArrayElement0; + uint32_t XOffset; + uint32_t YOffset; + struct GEN7_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + __gen_address_type MCSBaseAddress; + uint32_t MCSSurfacePitch; + __gen_address_type AppendCounterAddress; + bool AppendCounterEnable; + bool MCSEnable; + uint32_t XOffsetforUVPlane; + uint32_t YOffsetforUVPlane; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t RedClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t GreenClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t BlueClearColor; +#define CC_ZERO 0 +#define CC_ONE 1 + uint32_t AlphaClearColor; + float ResourceMinLOD; +}; + +static inline void +GEN7_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 15, 15) | + __gen_field(values->TiledSurface, 14, 14) | + __gen_field(values->TileWalk, 13, 13) | + __gen_field(values->VerticalLineStride, 12, 12) | + __gen_field(values->VerticalLineStrideOffset, 11, 11) | + __gen_field(values->SurfaceArraySpacing, 10, 10) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnables, 0, 5) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->SurfaceBaseAddress, dw1); + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + __gen_field(values->MinimumArrayElement, 0, 26) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN7_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 20, 23) | + __gen_field(dw_SurfaceObjectControlState, 16, 19) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + uint32_t dw6 = + __gen_field(values->MCSSurfacePitch, 3, 11) | + __gen_field(values->AppendCounterEnable, 1, 1) | + __gen_field(values->MCSEnable, 0, 0) | + __gen_field(values->XOffsetforUVPlane, 16, 29) | + __gen_field(values->YOffsetforUVPlane, 0, 13) | + 0; + + dw[6] = + __gen_combine_address(data, &dw[6], values->AppendCounterAddress, dw6); + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + +} + +#define GEN7_SAMPLER_BORDER_COLOR_STATE_length 0x00000004 + +struct GEN7_SAMPLER_BORDER_COLOR_STATE { + float BorderColorRedDX100GL; + uint32_t BorderColorAlpha; + uint32_t BorderColorBlue; + uint32_t BorderColorGreen; + uint32_t BorderColorRedDX9; + float BorderColorGreen0; + float BorderColorBlue0; + float BorderColorAlpha0; +}; + +static inline void +GEN7_SAMPLER_BORDER_COLOR_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_BORDER_COLOR_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->BorderColorRedDX100GL) | + __gen_field(values->BorderColorAlpha, 24, 31) | + __gen_field(values->BorderColorBlue, 16, 23) | + __gen_field(values->BorderColorGreen, 8, 15) | + __gen_field(values->BorderColorRedDX9, 0, 7) | + 0; + + dw[1] = + __gen_float(values->BorderColorGreen) | + 0; + + dw[2] = + __gen_float(values->BorderColorBlue) | + 0; + + dw[3] = + __gen_float(values->BorderColorAlpha) | + 0; + +} + +#define GEN7_SAMPLER_STATE_length 0x00000004 + +struct GEN7_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define OGL 1 + uint32_t LODPreClampEnable; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t BorderColorPointer; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN7_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN7_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampEnable, 28, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_offset(values->BorderColorPointer, 5, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyEnable, 25, 25) | + __gen_field(values->ChromaKeyIndex, 23, 24) | + __gen_field(values->ChromaKeyMode, 22, 22) | + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_VID 5 +#define VFCOMP_STORE_IID 6 +#define VFCOMP_STORE_PID 7 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 + diff --git a/src/vulkan/gen8_pack.h b/src/vulkan/gen8_pack.h new file mode 100644 index 00000000000..4d6b7c0a04d --- /dev/null +++ b/src/vulkan/gen8_pack.h @@ -0,0 +1,9179 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +/* Instructions, enums and structures for BDW. + * + * This file has been generated, do not hand edit. + */ + +#pragma once + +#include <stdio.h> +#include <assert.h> + +#ifndef __gen_validate_value +#define __gen_validate_value(x) +#endif + +#ifndef __gen_field_functions +#define __gen_field_functions + +union __gen_value { + float f; + uint32_t dw; +}; + +static inline uint64_t +__gen_mbo(uint32_t start, uint32_t end) +{ + return (~0ul >> (64 - (end - start + 1))) << start; +} + +static inline uint64_t +__gen_field(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + if (end - start + 1 < 64) + assert(v < 1ul << (end - start + 1)); +#endif + + return v << start; +} + +static inline uint64_t +__gen_offset(uint64_t v, uint32_t start, uint32_t end) +{ + __gen_validate_value(v); +#if DEBUG + uint64_t mask = (~0ul >> (64 - (end - start + 1))) << start; + + assert((v & ~mask) == 0); +#endif + + return v; +} + +static inline uint32_t +__gen_float(float v) +{ + __gen_validate_value(v); + return ((union __gen_value) { .f = (v) }).dw; +} + +#ifndef __gen_address_type +#error #define __gen_address_type before including this file +#endif + +#ifndef __gen_user_data +#error #define __gen_combine_address before including this file +#endif + +#endif + +#define GEN8_3DSTATE_URB_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 48, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_VS_length 0x00000002 + +struct GEN8_3DSTATE_URB_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t VSURBStartingAddress; + uint32_t VSURBEntryAllocationSize; + uint32_t VSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->VSURBStartingAddress, 25, 31) | + __gen_field(values->VSURBEntryAllocationSize, 16, 24) | + __gen_field(values->VSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 16, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_VS_length 0x00000009 + +struct GEN8_3DSTATE_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleVertexDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool VertexCacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleVertexDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 23, 31) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 2, 2) | + __gen_field(values->VertexCacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_GPGPU_CSR_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN8_GPGPU_CSR_BASE_ADDRESS_length 0x00000003 + +struct GEN8_GPGPU_CSR_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GPGPUCSRBaseAddress; +}; + +static inline void +GEN8_GPGPU_CSR_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_CSR_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GPGPUCSRBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_MI_ATOMIC_length_bias 0x00000002 +#define GEN8_MI_ATOMIC_header \ + .CommandType = 0, \ + .MICommandOpcode = 47 + +#define GEN8_MI_ATOMIC_length 0x00000003 + +struct GEN8_MI_ATOMIC { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; + uint32_t PostSyncOperation; +#define DWORD 0 +#define QWORD 1 +#define OCTWORD 2 +#define RESERVED 3 + uint32_t DataSize; + uint32_t InlineData; + uint32_t CSSTALL; + uint32_t ReturnDataControl; + uint32_t ATOMICOPCODE; + uint32_t DwordLength; + __gen_address_type MemoryAddress; + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; +}; + +static inline void +GEN8_MI_ATOMIC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ATOMIC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->DataSize, 19, 20) | + __gen_field(values->InlineData, 18, 18) | + __gen_field(values->CSSTALL, 17, 17) | + __gen_field(values->ReturnDataControl, 16, 16) | + __gen_field(values->ATOMICOPCODE, 8, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->MemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->Operand1DataDword0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->Operand2DataDword0, 0, 31) | + 0; + + dw[5] = + __gen_field(values->Operand1DataDword1, 0, 31) | + 0; + + dw[6] = + __gen_field(values->Operand2DataDword1, 0, 31) | + 0; + + dw[7] = + __gen_field(values->Operand1DataDword2, 0, 31) | + 0; + + dw[8] = + __gen_field(values->Operand2DataDword2, 0, 31) | + 0; + + dw[9] = + __gen_field(values->Operand1DataDword3, 0, 31) | + 0; + + dw[10] = + __gen_field(values->Operand2DataDword3, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_REG_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_REG_header \ + .CommandType = 0, \ + .MICommandOpcode = 42, \ + .DwordLength = 1 + +#define GEN8_MI_LOAD_REGISTER_REG_length 0x00000003 + +struct GEN8_MI_LOAD_REGISTER_REG { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t SourceRegisterAddress; + uint32_t DestinationRegisterAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_REG_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_REG * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SourceRegisterAddress, 2, 22) | + 0; + + dw[2] = + __gen_offset(values->DestinationRegisterAddress, 2, 22) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_SIGNAL_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_SIGNAL_header \ + .CommandType = 0, \ + .MICommandOpcode = 27, \ + .DwordLength = 0 + +#define GEN8_MI_SEMAPHORE_SIGNAL_length 0x00000002 + +struct GEN8_MI_SEMAPHORE_SIGNAL { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t PostSyncOperation; +#define RCS 0 +#define VCS0 1 +#define BCS 2 +#define VECS 3 +#define VCS1 4 + uint32_t TargetEngineSelect; + uint32_t DwordLength; + uint32_t TargetContextID; +}; + +static inline void +GEN8_MI_SEMAPHORE_SIGNAL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_SIGNAL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PostSyncOperation, 21, 21) | + __gen_field(values->TargetEngineSelect, 15, 17) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->TargetContextID, 0, 31) | + 0; + +} + +#define GEN8_MI_SEMAPHORE_WAIT_length_bias 0x00000002 +#define GEN8_MI_SEMAPHORE_WAIT_header \ + .CommandType = 0, \ + .MICommandOpcode = 28, \ + .DwordLength = 2 + +#define GEN8_MI_SEMAPHORE_WAIT_length 0x00000004 + +struct GEN8_MI_SEMAPHORE_WAIT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t MemoryType; +#define PollingMode 1 +#define SignalMode 0 + uint32_t WaitMode; +#define SAD_GREATER_THAN_SDD 0 +#define SAD_GREATER_THAN_OR_EQUAL_SDD 1 +#define SAD_LESS_THAN_SDD 2 +#define SAD_LESS_THAN_OR_EQUAL_SDD 3 +#define SAD_EQUAL_SDD 4 +#define SAD_NOT_EQUAL_SDD 5 + uint32_t CompareOperation; + uint32_t DwordLength; + uint32_t SemaphoreDataDword; + __gen_address_type SemaphoreAddress; +}; + +static inline void +GEN8_MI_SEMAPHORE_WAIT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SEMAPHORE_WAIT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->MemoryType, 22, 22) | + __gen_field(values->WaitMode, 15, 15) | + __gen_field(values->CompareOperation, 12, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SemaphoreDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SemaphoreAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_STORE_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 36, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_REGISTER_MEM_length 0x00000004 + +struct GEN8_MI_STORE_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t PredicateEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->PredicateEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_PIPELINE_SELECT_length_bias 0x00000001 +#define GEN8_PIPELINE_SELECT_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4 + +#define GEN8_PIPELINE_SELECT_length 0x00000001 + +struct GEN8_PIPELINE_SELECT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define _3D 0 +#define Media 1 +#define GPGPU 2 + uint32_t PipelineSelection; +}; + +static inline void +GEN8_PIPELINE_SELECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPELINE_SELECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->PipelineSelection, 0, 1) | + 0; + +} + +#define GEN8_STATE_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_STATE_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 1, \ + .DwordLength = 14 + +#define GEN8_STATE_BASE_ADDRESS_length 0x00000010 + +#define GEN8_MEMORY_OBJECT_CONTROL_STATE_length 0x00000001 + +struct GEN8_MEMORY_OBJECT_CONTROL_STATE { +#define UCwithFenceifcoherentcycle 0 +#define UCUncacheable 1 +#define WT 2 +#define WB 3 + uint32_t MemoryTypeLLCeLLCCacheabilityControl; +#define eLLCOnlywheneDRAMispresentelsegetsallocatedinLLC 0 +#define LLCOnly 1 +#define LLCeLLCAllowed 2 +#define L3DefertoPATforLLCeLLCselection 3 + uint32_t TargetCache; + uint32_t AgeforQUADLRU; +}; + +static inline void +GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEMORY_OBJECT_CONTROL_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->MemoryTypeLLCeLLCCacheabilityControl, 5, 6) | + __gen_field(values->TargetCache, 3, 4) | + __gen_field(values->AgeforQUADLRU, 0, 1) | + 0; + +} + +struct GEN8_STATE_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GeneralStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE GeneralStateMemoryObjectControlState; + bool GeneralStateBaseAddressModifyEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StatelessDataPortAccessMemoryObjectControlState; + __gen_address_type SurfaceStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceStateMemoryObjectControlState; + bool SurfaceStateBaseAddressModifyEnable; + __gen_address_type DynamicStateBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DynamicStateMemoryObjectControlState; + bool DynamicStateBaseAddressModifyEnable; + __gen_address_type IndirectObjectBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE IndirectObjectMemoryObjectControlState; + bool IndirectObjectBaseAddressModifyEnable; + __gen_address_type InstructionBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE InstructionMemoryObjectControlState; + bool InstructionBaseAddressModifyEnable; + uint32_t GeneralStateBufferSize; + bool GeneralStateBufferSizeModifyEnable; + uint32_t DynamicStateBufferSize; + bool DynamicStateBufferSizeModifyEnable; + uint32_t IndirectObjectBufferSize; + bool IndirectObjectBufferSizeModifyEnable; + uint32_t InstructionBufferSize; + bool InstructionBuffersizeModifyEnable; +}; + +static inline void +GEN8_STATE_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_GeneralStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_GeneralStateMemoryObjectControlState, &values->GeneralStateMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_GeneralStateMemoryObjectControlState, 4, 10) | + __gen_field(values->GeneralStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw_StatelessDataPortAccessMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StatelessDataPortAccessMemoryObjectControlState, &values->StatelessDataPortAccessMemoryObjectControlState); + dw[3] = + __gen_field(dw_StatelessDataPortAccessMemoryObjectControlState, 16, 22) | + 0; + + uint32_t dw_SurfaceStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceStateMemoryObjectControlState, &values->SurfaceStateMemoryObjectControlState); + uint32_t dw4 = + __gen_field(dw_SurfaceStateMemoryObjectControlState, 4, 10) | + __gen_field(values->SurfaceStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw_DynamicStateMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DynamicStateMemoryObjectControlState, &values->DynamicStateMemoryObjectControlState); + uint32_t dw6 = + __gen_field(dw_DynamicStateMemoryObjectControlState, 4, 10) | + __gen_field(values->DynamicStateBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw_IndirectObjectMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_IndirectObjectMemoryObjectControlState, &values->IndirectObjectMemoryObjectControlState); + uint32_t dw8 = + __gen_field(dw_IndirectObjectMemoryObjectControlState, 4, 10) | + __gen_field(values->IndirectObjectBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw_InstructionMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_InstructionMemoryObjectControlState, &values->InstructionMemoryObjectControlState); + uint32_t dw10 = + __gen_field(dw_InstructionMemoryObjectControlState, 4, 10) | + __gen_field(values->InstructionBaseAddressModifyEnable, 0, 0) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + __gen_field(values->GeneralStateBufferSize, 12, 31) | + __gen_field(values->GeneralStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[13] = + __gen_field(values->DynamicStateBufferSize, 12, 31) | + __gen_field(values->DynamicStateBufferSizeModifyEnable, 0, 0) | + 0; + + dw[14] = + __gen_field(values->IndirectObjectBufferSize, 12, 31) | + __gen_field(values->IndirectObjectBufferSizeModifyEnable, 0, 0) | + 0; + + dw[15] = + __gen_field(values->InstructionBufferSize, 12, 31) | + __gen_field(values->InstructionBuffersizeModifyEnable, 0, 0) | + 0; + +} + +#define GEN8_STATE_PREFETCH_length_bias 0x00000002 +#define GEN8_STATE_PREFETCH_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN8_STATE_PREFETCH_length 0x00000002 + +struct GEN8_STATE_PREFETCH { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type PrefetchPointer; + uint32_t PrefetchCount; +}; + +static inline void +GEN8_STATE_PREFETCH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_PREFETCH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->PrefetchCount, 0, 2) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->PrefetchPointer, dw1); + +} + +#define GEN8_STATE_SIP_length_bias 0x00000002 +#define GEN8_STATE_SIP_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2, \ + .DwordLength = 1 + +#define GEN8_STATE_SIP_length 0x00000003 + +struct GEN8_STATE_SIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t SystemInstructionPointer; +}; + +static inline void +GEN8_STATE_SIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_STATE_SIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->SystemInstructionPointer, 4, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_SWTESS_BASE_ADDRESS_length_bias 0x00000002 +#define GEN8_SWTESS_BASE_ADDRESS_header \ + .CommandType = 3, \ + .CommandSubType = 0, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 3, \ + .DwordLength = 0 + +#define GEN8_SWTESS_BASE_ADDRESS_length 0x00000002 + +struct GEN8_SWTESS_BASE_ADDRESS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type SWTessellationBaseAddress; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SWTessellationMemoryObjectControlState; +}; + +static inline void +GEN8_SWTESS_BASE_ADDRESS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SWTESS_BASE_ADDRESS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SWTessellationMemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SWTessellationMemoryObjectControlState, &values->SWTessellationMemoryObjectControlState); + uint32_t dw1 = + __gen_field(dw_SWTessellationMemoryObjectControlState, 8, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->SWTessellationBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_3DPRIMITIVE_length_bias 0x00000002 +#define GEN8_3DPRIMITIVE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 3, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 5 + +#define GEN8_3DPRIMITIVE_length 0x00000007 + +struct GEN8_3DPRIMITIVE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndirectParameterEnable; + uint32_t UAVCoherencyRequired; + bool PredicateEnable; + uint32_t DwordLength; + bool EndOffsetEnable; +#define SEQUENTIAL 0 +#define RANDOM 1 + uint32_t VertexAccessType; + uint32_t PrimitiveTopologyType; + uint32_t VertexCountPerInstance; + uint32_t StartVertexLocation; + uint32_t InstanceCount; + uint32_t StartInstanceLocation; + uint32_t BaseVertexLocation; +}; + +static inline void +GEN8_3DPRIMITIVE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DPRIMITIVE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->UAVCoherencyRequired, 9, 9) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->EndOffsetEnable, 9, 9) | + __gen_field(values->VertexAccessType, 8, 8) | + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + + dw[2] = + __gen_field(values->VertexCountPerInstance, 0, 31) | + 0; + + dw[3] = + __gen_field(values->StartVertexLocation, 0, 31) | + 0; + + dw[4] = + __gen_field(values->InstanceCount, 0, 31) | + 0; + + dw[5] = + __gen_field(values->StartInstanceLocation, 0, 31) | + 0; + + dw[6] = + __gen_field(values->BaseVertexLocation, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length_bias 0x00000002 +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_AA_LINE_PARAMETERS_length 0x00000003 + +struct GEN8_3DSTATE_AA_LINE_PARAMETERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float AAPointCoverageBias; + float AACoverageBias; + float AAPointCoverageSlope; + float AACoverageSlope; + float AAPointCoverageEndCapBias; + float AACoverageEndCapBias; + float AAPointCoverageEndCapSlope; + float AACoverageEndCapSlope; +}; + +static inline void +GEN8_3DSTATE_AA_LINE_PARAMETERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_AA_LINE_PARAMETERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AAPointCoverageBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageSlope * (1 << 8), 0, 7) | + 0; + + dw[2] = + __gen_field(values->AAPointCoverageEndCapBias * (1 << 8), 24, 31) | + __gen_field(values->AACoverageEndCapBias * (1 << 8), 16, 23) | + __gen_field(values->AAPointCoverageEndCapSlope * (1 << 8), 8, 15) | + __gen_field(values->AACoverageEndCapSlope * (1 << 8), 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 70 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_length 0x00000000 + +#define GEN8_BINDING_TABLE_EDIT_ENTRY_length 0x00000001 + +struct GEN8_BINDING_TABLE_EDIT_ENTRY { + uint32_t BindingTableIndex; + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_EDIT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_EDIT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BindingTableIndex, 16, 23) | + __gen_offset(values->SurfaceStatePointer, 0, 15) | + 0; + +} + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 68 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 69 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 71 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 67 + +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_length 0x00000000 + +struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BindingTableBlockClear; +#define AllCores 3 +#define Core1 2 +#define Core0 1 + uint32_t BindingTableEditTarget; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_EDIT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_EDIT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->BindingTableBlockClear, 16, 31) | + __gen_field(values->BindingTableEditTarget, 0, 1) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 40, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 41, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 39, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 42, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 38, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_length 0x00000002 + +struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSBindingTable; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSBindingTable, 5, 15) | + 0; + +} + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_length 0x00000004 + +struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type BindingTablePoolBaseAddress; + uint32_t BindingTablePoolEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SurfaceObjectControlState; +#define NoValidData 0 + uint32_t BindingTablePoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SurfaceObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SurfaceObjectControlState, &values->SurfaceObjectControlState); + uint32_t dw1 = + __gen_field(values->BindingTablePoolEnable, 11, 11) | + __gen_field(dw_SurfaceObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BindingTablePoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 36, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_BLEND_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_BLEND_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t BlendStatePointer; + bool BlendStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_BLEND_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_BLEND_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->BlendStatePointer, 6, 31) | + __gen_field(values->BlendStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_CC_STATE_POINTERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 14, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_CC_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_CC_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ColorCalcStatePointer; + bool ColorCalcStatePointerValid; +}; + +static inline void +GEN8_3DSTATE_CC_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CC_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ColorCalcStatePointer, 6, 31) | + __gen_field(values->ColorCalcStatePointerValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CHROMA_KEY_length_bias 0x00000002 +#define GEN8_3DSTATE_CHROMA_KEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_CHROMA_KEY_length 0x00000004 + +struct GEN8_3DSTATE_CHROMA_KEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ChromaKeyTableIndex; + uint32_t ChromaKeyLowValue; + uint32_t ChromaKeyHighValue; +}; + +static inline void +GEN8_3DSTATE_CHROMA_KEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CHROMA_KEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyTableIndex, 30, 31) | + 0; + + dw[2] = + __gen_field(values->ChromaKeyLowValue, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ChromaKeyHighValue, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_CLEAR_PARAMS_length_bias 0x00000002 +#define GEN8_3DSTATE_CLEAR_PARAMS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 4, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_CLEAR_PARAMS_length 0x00000003 + +struct GEN8_3DSTATE_CLEAR_PARAMS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float DepthClearValue; + bool DepthClearValueValid; +}; + +static inline void +GEN8_3DSTATE_CLEAR_PARAMS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLEAR_PARAMS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_float(values->DepthClearValue) | + 0; + + dw[2] = + __gen_field(values->DepthClearValueValid, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_CLIP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_CLIP_length 0x00000004 + +struct GEN8_3DSTATE_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceCullTestEnableBitmask; +#define _8Bit 0 +#define _4Bit 1 + uint32_t VertexSubPixelPrecisionSelect; + bool EarlyCullEnable; +#define Normal 0 +#define Force 1 + bool ForceUserClipDistanceClipTestEnableBitmask; +#define Normal 0 +#define Force 1 + bool ForceClipMode; + bool ClipperStatisticsEnable; + uint32_t UserClipDistanceCullTestEnableBitmask; + bool ClipEnable; +#define API_OGL 0 + uint32_t APIMode; + bool ViewportXYClipTestEnable; + bool GuardbandClipTestEnable; + uint32_t UserClipDistanceClipTestEnableBitmask; +#define NORMAL 0 +#define REJECT_ALL 3 +#define ACCEPT_ALL 4 + uint32_t ClipMode; + bool PerspectiveDivideDisable; + bool NonPerspectiveBarycentricEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; + float MinimumPointWidth; + float MaximumPointWidth; + bool ForceZeroRTAIndexEnable; + uint32_t MaximumVPIndex; +}; + +static inline void +GEN8_3DSTATE_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceUserClipDistanceCullTestEnableBitmask, 20, 20) | + __gen_field(values->VertexSubPixelPrecisionSelect, 19, 19) | + __gen_field(values->EarlyCullEnable, 18, 18) | + __gen_field(values->ForceUserClipDistanceClipTestEnableBitmask, 17, 17) | + __gen_field(values->ForceClipMode, 16, 16) | + __gen_field(values->ClipperStatisticsEnable, 10, 10) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + + dw[2] = + __gen_field(values->ClipEnable, 31, 31) | + __gen_field(values->APIMode, 30, 30) | + __gen_field(values->ViewportXYClipTestEnable, 28, 28) | + __gen_field(values->GuardbandClipTestEnable, 26, 26) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 16, 23) | + __gen_field(values->ClipMode, 13, 15) | + __gen_field(values->PerspectiveDivideDisable, 9, 9) | + __gen_field(values->NonPerspectiveBarycentricEnable, 8, 8) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 4, 5) | + __gen_field(values->LineStripListProvokingVertexSelect, 2, 3) | + __gen_field(values->TriangleFanProvokingVertexSelect, 0, 1) | + 0; + + dw[3] = + __gen_field(values->MinimumPointWidth * (1 << 3), 17, 27) | + __gen_field(values->MaximumPointWidth * (1 << 3), 6, 16) | + __gen_field(values->ForceZeroRTAIndexEnable, 5, 5) | + __gen_field(values->MaximumVPIndex, 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_DS_length 0x0000000b + +#define GEN8_3DSTATE_CONSTANT_BODY_length 0x0000000a + +struct GEN8_3DSTATE_CONSTANT_BODY { + uint32_t ConstantBuffer1ReadLength; + uint32_t ConstantBuffer0ReadLength; + uint32_t ConstantBuffer3ReadLength; + uint32_t ConstantBuffer2ReadLength; + __gen_address_type PointerToConstantBuffer0; + __gen_address_type PointerToConstantBuffer1; + __gen_address_type PointerToConstantBuffer2; + __gen_address_type PointerToConstantBuffer3; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_BODY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_BODY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ConstantBuffer1ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer0ReadLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ConstantBuffer3ReadLength, 16, 31) | + __gen_field(values->ConstantBuffer2ReadLength, 0, 15) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->PointerToConstantBuffer0, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint32_t dw4 = + 0; + + uint64_t qw4 = + __gen_combine_address(data, &dw[4], values->PointerToConstantBuffer1, dw4); + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + uint32_t dw6 = + 0; + + uint64_t qw6 = + __gen_combine_address(data, &dw[6], values->PointerToConstantBuffer2, dw6); + + dw[6] = qw6; + dw[7] = qw6 >> 32; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->PointerToConstantBuffer3, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + +} + +struct GEN8_3DSTATE_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_GS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 25, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_HS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 23, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_PS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_CONSTANT_VS_length 0x0000000b + +struct GEN8_3DSTATE_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE ConstantBufferObjectControlState; + uint32_t DwordLength; + struct GEN8_3DSTATE_CONSTANT_BODY ConstantBody; +}; + +static inline void +GEN8_3DSTATE_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_ConstantBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_ConstantBufferObjectControlState, &values->ConstantBufferObjectControlState); + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(dw_ConstantBufferObjectControlState, 8, 14) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + GEN8_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody); +} + +#define GEN8_3DSTATE_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 5, \ + .DwordLength = 6 + +#define GEN8_3DSTATE_DEPTH_BUFFER_length 0x00000008 + +struct GEN8_3DSTATE_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool DepthWriteEnable; + bool StencilWriteEnable; + bool HierarchicalDepthBufferEnable; +#define D32_FLOAT 1 +#define D24_UNORM_X8_UINT 3 +#define D16_UNORM 5 + uint32_t SurfaceFormat; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t Height; + uint32_t Width; + uint32_t LOD; + uint32_t Depth; + uint32_t MinimumArrayElement; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE DepthBufferObjectControlState; + uint32_t RenderTargetViewExtent; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->DepthWriteEnable, 28, 28) | + __gen_field(values->StencilWriteEnable, 27, 27) | + __gen_field(values->HierarchicalDepthBufferEnable, 22, 22) | + __gen_field(values->SurfaceFormat, 18, 20) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->Height, 18, 31) | + __gen_field(values->Width, 4, 17) | + __gen_field(values->LOD, 0, 3) | + 0; + + uint32_t dw_DepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_DepthBufferObjectControlState, &values->DepthBufferObjectControlState); + dw[5] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->MinimumArrayElement, 10, 20) | + __gen_field(dw_DepthBufferObjectControlState, 0, 6) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->RenderTargetViewExtent, 21, 31) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length_bias 0x00000002 +#define GEN8_3DSTATE_DRAWING_RECTANGLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_DRAWING_RECTANGLE_length 0x00000004 + +struct GEN8_3DSTATE_DRAWING_RECTANGLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; +#define Legacy 0 +#define Core0Enabled 1 +#define Core1Enabled 2 + uint32_t CoreModeSelect; + uint32_t DwordLength; + uint32_t ClippedDrawingRectangleYMin; + uint32_t ClippedDrawingRectangleXMin; + uint32_t ClippedDrawingRectangleYMax; + uint32_t ClippedDrawingRectangleXMax; + uint32_t DrawingRectangleOriginY; + uint32_t DrawingRectangleOriginX; +}; + +static inline void +GEN8_3DSTATE_DRAWING_RECTANGLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DRAWING_RECTANGLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->CoreModeSelect, 14, 15) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ClippedDrawingRectangleYMin, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMin, 0, 15) | + 0; + + dw[2] = + __gen_field(values->ClippedDrawingRectangleYMax, 16, 31) | + __gen_field(values->ClippedDrawingRectangleXMax, 0, 15) | + 0; + + dw[3] = + __gen_field(values->DrawingRectangleOriginY, 16, 31) | + __gen_field(values->DrawingRectangleOriginX, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 29, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_DS_length 0x00000009 + +struct GEN8_3DSTATE_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; +#define Multiple 0 +#define Single 1 + uint32_t SingleDomainPointDispatch; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool AccessesUAV; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t PatchURBEntryReadLength; + uint32_t PatchURBEntryReadOffset; + uint32_t MaximumNumberofThreads; + bool StatisticsEnable; + bool SIMD8DispatchEnable; + bool ComputeWCoordinateEnable; + bool CacheDisable; + bool FunctionEnable; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleDomainPointDispatch, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->AccessesUAV, 14, 14) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->DispatchGRFStartRegisterForURBData, 20, 24) | + __gen_field(values->PatchURBEntryReadLength, 11, 17) | + __gen_field(values->PatchURBEntryReadOffset, 4, 9) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 21, 29) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->SIMD8DispatchEnable, 3, 3) | + __gen_field(values->ComputeWCoordinateEnable, 2, 2) | + __gen_field(values->CacheDisable, 1, 1) | + __gen_field(values->FunctionEnable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 55 + +#define GEN8_3DSTATE_GATHER_CONSTANT_DS_length 0x00000000 + +#define GEN8_GATHER_CONSTANT_ENTRY_length 0x00000001 + +struct GEN8_GATHER_CONSTANT_ENTRY { + uint32_t ConstantBufferOffset; + uint32_t ChannelMask; + uint32_t BindingTableIndexOffset; +}; + +static inline void +GEN8_GATHER_CONSTANT_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GATHER_CONSTANT_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->ConstantBufferOffset, 8, 15) | + __gen_field(values->ChannelMask, 4, 7) | + __gen_field(values->BindingTableIndexOffset, 0, 3) | + 0; + +} + +struct GEN8_3DSTATE_GATHER_CONSTANT_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 53 + +#define GEN8_3DSTATE_GATHER_CONSTANT_GS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 54 + +#define GEN8_3DSTATE_GATHER_CONSTANT_HS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 56 + +#define GEN8_3DSTATE_GATHER_CONSTANT_PS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 52 + +#define GEN8_3DSTATE_GATHER_CONSTANT_VS_length 0x00000000 + +struct GEN8_3DSTATE_GATHER_CONSTANT_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferValid; + uint32_t ConstantBufferBindingTableBlock; + uint32_t GatherBufferOffset; + bool ConstantBufferDx9GenerateStall; + bool ConstantBufferDx9Enable; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_GATHER_CONSTANT_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_CONSTANT_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferValid, 16, 31) | + __gen_field(values->ConstantBufferBindingTableBlock, 12, 15) | + 0; + + dw[2] = + __gen_offset(values->GatherBufferOffset, 6, 22) | + __gen_field(values->ConstantBufferDx9GenerateStall, 5, 5) | + __gen_field(values->ConstantBufferDx9Enable, 4, 4) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length_bias 0x00000002 +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 26, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_GATHER_POOL_ALLOC_length 0x00000004 + +struct GEN8_3DSTATE_GATHER_POOL_ALLOC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + __gen_address_type GatherPoolBaseAddress; + bool GatherPoolEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t GatherPoolBufferSize; +}; + +static inline void +GEN8_3DSTATE_GATHER_POOL_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GATHER_POOL_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + uint32_t dw1 = + __gen_field(values->GatherPoolEnable, 11, 11) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->GatherPoolBufferSize, 12, 31) | + 0; + +} + +#define GEN8_3DSTATE_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 8 + +#define GEN8_3DSTATE_GS_length 0x0000000a + +struct GEN8_3DSTATE_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer; + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool AccessesUAV; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t ExpectedVertexCount; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t OutputVertexSize; + uint32_t OutputTopology; + uint32_t VertexURBEntryReadLength; + bool IncludeVertexHandles; + uint32_t VertexURBEntryReadOffset; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t MaximumNumberofThreads; + uint32_t ControlDataHeaderSize; + uint32_t InstanceControl; + uint32_t DefaultStreamId; +#define DispatchModeSingle 0 +#define DispatchModeDualInstance 1 +#define DispatchModeDualObject 2 +#define DispatchModeSIMD8 3 + uint32_t DispatchMode; + bool StatisticsEnable; + uint32_t InvocationsIncrementValue; + bool IncludePrimitiveID; + uint32_t Hint; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool DiscardAdjacency; + bool Enable; +#define CUT 0 +#define SID 1 + uint32_t ControlDataFormat; + bool StaticOutput; + uint32_t StaticOutputVertexCount; + uint32_t VertexURBEntryOutputReadOffset; + uint32_t VertexURBEntryOutputLength; + uint32_t UserClipDistanceClipTestEnableBitmask; + uint32_t UserClipDistanceCullTestEnableBitmask; +}; + +static inline void +GEN8_3DSTATE_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->AccessesUAV, 12, 12) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + __gen_field(values->ExpectedVertexCount, 0, 5) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->OutputVertexSize, 23, 28) | + __gen_field(values->OutputTopology, 17, 22) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->IncludeVertexHandles, 10, 10) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 0, 3) | + 0; + + dw[7] = + __gen_field(values->MaximumNumberofThreads, 24, 31) | + __gen_field(values->ControlDataHeaderSize, 20, 23) | + __gen_field(values->InstanceControl, 15, 19) | + __gen_field(values->DefaultStreamId, 13, 14) | + __gen_field(values->DispatchMode, 11, 12) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->InvocationsIncrementValue, 5, 9) | + __gen_field(values->IncludePrimitiveID, 4, 4) | + __gen_field(values->Hint, 3, 3) | + __gen_field(values->ReorderMode, 2, 2) | + __gen_field(values->DiscardAdjacency, 1, 1) | + __gen_field(values->Enable, 0, 0) | + 0; + + dw[8] = + __gen_field(values->ControlDataFormat, 31, 31) | + __gen_field(values->StaticOutput, 30, 30) | + __gen_field(values->StaticOutputVertexCount, 16, 26) | + 0; + + dw[9] = + __gen_field(values->VertexURBEntryOutputReadOffset, 21, 26) | + __gen_field(values->VertexURBEntryOutputLength, 16, 20) | + __gen_field(values->UserClipDistanceClipTestEnableBitmask, 8, 15) | + __gen_field(values->UserClipDistanceCullTestEnableBitmask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_HIER_DEPTH_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_HIER_DEPTH_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE HierarchicalDepthBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_HIER_DEPTH_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HIER_DEPTH_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_HierarchicalDepthBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_HierarchicalDepthBufferObjectControlState, &values->HierarchicalDepthBufferObjectControlState); + dw[1] = + __gen_field(dw_HierarchicalDepthBufferObjectControlState, 25, 31) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 27, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_HS_length 0x00000009 + +struct GEN8_3DSTATE_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool SoftwareExceptionEnable; + bool Enable; + bool StatisticsEnable; + uint32_t MaximumNumberofThreads; + uint32_t InstanceCount; + uint64_t KernelStartPointer; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + bool SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; + bool AccessesUAV; + bool IncludeVertexHandles; + uint32_t DispatchGRFStartRegisterForURBData; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; +}; + +static inline void +GEN8_3DSTATE_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->SoftwareExceptionEnable, 12, 12) | + 0; + + dw[2] = + __gen_field(values->Enable, 31, 31) | + __gen_field(values->StatisticsEnable, 29, 29) | + __gen_field(values->MaximumNumberofThreads, 8, 16) | + __gen_field(values->InstanceCount, 0, 3) | + 0; + + uint64_t qw3 = + __gen_offset(values->KernelStartPointer, 6, 63) | + 0; + + dw[3] = qw3; + dw[4] = qw3 >> 32; + + uint64_t qw5 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->SingleProgramFlow, 27, 27) | + __gen_field(values->VectorMaskEnable, 26, 26) | + __gen_field(values->AccessesUAV, 25, 25) | + __gen_field(values->IncludeVertexHandles, 24, 24) | + __gen_field(values->DispatchGRFStartRegisterForURBData, 19, 23) | + __gen_field(values->VertexURBEntryReadLength, 11, 16) | + __gen_field(values->VertexURBEntryReadOffset, 4, 9) | + 0; + + dw[8] = + 0; + +} + +#define GEN8_3DSTATE_INDEX_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_INDEX_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 10, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_INDEX_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_INDEX_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INDEX_BYTE 0 +#define INDEX_WORD 1 +#define INDEX_DWORD 2 + uint32_t IndexFormat; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_3DSTATE_INDEX_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_INDEX_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(values->IndexFormat, 8, 9) | + __gen_field(dw_MemoryObjectControlState, 0, 6) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->BufferStartingAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_LINE_STIPPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_LINE_STIPPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 8, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_LINE_STIPPLE_length 0x00000003 + +struct GEN8_3DSTATE_LINE_STIPPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ModifyEnableCurrentRepeatCounterCurrentStippleIndex; + uint32_t CurrentRepeatCounter; + uint32_t CurrentStippleIndex; + uint32_t LineStipplePattern; + float LineStippleInverseRepeatCount; + uint32_t LineStippleRepeatCount; +}; + +static inline void +GEN8_3DSTATE_LINE_STIPPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_LINE_STIPPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ModifyEnableCurrentRepeatCounterCurrentStippleIndex, 31, 31) | + __gen_field(values->CurrentRepeatCounter, 21, 29) | + __gen_field(values->CurrentStippleIndex, 16, 19) | + __gen_field(values->LineStipplePattern, 0, 15) | + 0; + + dw[2] = + __gen_field(values->LineStippleInverseRepeatCount * (1 << 16), 15, 31) | + __gen_field(values->LineStippleRepeatCount, 0, 8) | + 0; + +} + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length_bias 0x00000002 +#define GEN8_3DSTATE_MONOFILTER_SIZE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 17, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_MONOFILTER_SIZE_length 0x00000002 + +struct GEN8_3DSTATE_MONOFILTER_SIZE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t MonochromeFilterWidth; + uint32_t MonochromeFilterHeight; +}; + +static inline void +GEN8_3DSTATE_MONOFILTER_SIZE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MONOFILTER_SIZE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->MonochromeFilterWidth, 3, 5) | + __gen_field(values->MonochromeFilterHeight, 0, 2) | + 0; + +} + +#define GEN8_3DSTATE_MULTISAMPLE_length_bias 0x00000002 +#define GEN8_3DSTATE_MULTISAMPLE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 13, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_MULTISAMPLE_length 0x00000002 + +struct GEN8_3DSTATE_MULTISAMPLE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PixelPositionOffsetEnable; +#define CENTER 0 +#define UL_CORNER 1 + uint32_t PixelLocation; + uint32_t NumberofMultisamples; +}; + +static inline void +GEN8_3DSTATE_MULTISAMPLE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_MULTISAMPLE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelPositionOffsetEnable, 5, 5) | + __gen_field(values->PixelLocation, 4, 4) | + __gen_field(values->NumberofMultisamples, 1, 3) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_POLY_STIPPLE_OFFSET_length 0x00000002 + +struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PolygonStippleXOffset; + uint32_t PolygonStippleYOffset; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_OFFSET_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_OFFSET * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PolygonStippleXOffset, 8, 12) | + __gen_field(values->PolygonStippleYOffset, 0, 4) | + 0; + +} + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 7, \ + .DwordLength = 31 + +#define GEN8_3DSTATE_POLY_STIPPLE_PATTERN_length 0x00000021 + +struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PatternRow[32]; +}; + +static inline void +GEN8_3DSTATE_POLY_STIPPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_POLY_STIPPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 32; i += 1, j++) { + dw[j] = + __gen_field(values->PatternRow[i + 0], 0, 31) | + 0; + } + +} + +#define GEN8_3DSTATE_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 32, \ + .DwordLength = 10 + +#define GEN8_3DSTATE_PS_length 0x0000000c + +struct GEN8_3DSTATE_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint64_t KernelStartPointer0; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define Dmask 0 +#define Vmask 1 + uint32_t VectorMaskEnable; +#define NoSamplers 0 +#define _14Samplers 1 +#define _58Samplers 2 +#define _912Samplers 3 +#define _1316Samplers 4 + uint32_t SamplerCount; +#define FlushedtoZero 0 +#define Retained 1 + uint32_t SinglePrecisionDenormalMode; + uint32_t BindingTableEntryCount; +#define Normal 0 +#define High 1 + uint32_t ThreadDispatchPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint64_t ScratchSpaceBasePointer; + uint32_t PerThreadScratchSpace; + uint32_t MaximumNumberofThreadsPerPSD; + bool PushConstantEnable; + bool RenderTargetFastClearEnable; + bool RenderTargetResolveEnable; +#define POSOFFSET_NONE 0 +#define POSOFFSET_CENTROID 2 +#define POSOFFSET_SAMPLE 3 + uint32_t PositionXYOffsetSelect; + bool _32PixelDispatchEnable; + bool _16PixelDispatchEnable; + bool _8PixelDispatchEnable; + uint32_t DispatchGRFStartRegisterForConstantSetupData0; + uint32_t DispatchGRFStartRegisterForConstantSetupData1; + uint32_t DispatchGRFStartRegisterForConstantSetupData2; + uint64_t KernelStartPointer1; + uint64_t KernelStartPointer2; +}; + +static inline void +GEN8_3DSTATE_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint64_t qw1 = + __gen_offset(values->KernelStartPointer0, 6, 63) | + 0; + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->SingleProgramFlow, 31, 31) | + __gen_field(values->VectorMaskEnable, 30, 30) | + __gen_field(values->SamplerCount, 27, 29) | + __gen_field(values->SinglePrecisionDenormalMode, 26, 26) | + __gen_field(values->BindingTableEntryCount, 18, 25) | + __gen_field(values->ThreadDispatchPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->RoundingMode, 14, 15) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + uint64_t qw4 = + __gen_offset(values->ScratchSpaceBasePointer, 10, 63) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + + dw[6] = + __gen_field(values->MaximumNumberofThreadsPerPSD, 23, 31) | + __gen_field(values->PushConstantEnable, 11, 11) | + __gen_field(values->RenderTargetFastClearEnable, 8, 8) | + __gen_field(values->RenderTargetResolveEnable, 6, 6) | + __gen_field(values->PositionXYOffsetSelect, 3, 4) | + __gen_field(values->_32PixelDispatchEnable, 2, 2) | + __gen_field(values->_16PixelDispatchEnable, 1, 1) | + __gen_field(values->_8PixelDispatchEnable, 0, 0) | + 0; + + dw[7] = + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData0, 16, 22) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData1, 8, 14) | + __gen_field(values->DispatchGRFStartRegisterForConstantSetupData2, 0, 6) | + 0; + + uint64_t qw8 = + __gen_offset(values->KernelStartPointer1, 6, 63) | + 0; + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint64_t qw10 = + __gen_offset(values->KernelStartPointer2, 6, 63) | + 0; + + dw[10] = qw10; + dw[11] = qw10 >> 32; + +} + +#define GEN8_3DSTATE_PS_BLEND_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_BLEND_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 77, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PS_BLEND_length 0x00000002 + +struct GEN8_3DSTATE_PS_BLEND { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool AlphaToCoverageEnable; + bool HasWriteableRT; + bool ColorBufferBlendEnable; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + bool AlphaTestEnable; + bool IndependentAlphaBlendEnable; +}; + +static inline void +GEN8_3DSTATE_PS_BLEND_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_BLEND * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->HasWriteableRT, 30, 30) | + __gen_field(values->ColorBufferBlendEnable, 29, 29) | + __gen_field(values->SourceAlphaBlendFactor, 24, 28) | + __gen_field(values->DestinationAlphaBlendFactor, 19, 23) | + __gen_field(values->SourceBlendFactor, 14, 18) | + __gen_field(values->DestinationBlendFactor, 9, 13) | + __gen_field(values->AlphaTestEnable, 8, 8) | + __gen_field(values->IndependentAlphaBlendEnable, 7, 7) | + 0; + +} + +#define GEN8_3DSTATE_PS_EXTRA_length_bias 0x00000002 +#define GEN8_3DSTATE_PS_EXTRA_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 79, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PS_EXTRA_length 0x00000002 + +struct GEN8_3DSTATE_PS_EXTRA { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool PixelShaderValid; + bool PixelShaderDoesnotwritetoRT; + bool oMaskPresenttoRenderTarget; + bool PixelShaderKillsPixel; +#define PSCDEPTH_OFF 0 +#define PSCDEPTH_ON 1 +#define PSCDEPTH_ON_GE 2 +#define PSCDEPTH_ON_LE 3 + uint32_t PixelShaderComputedDepthMode; + bool ForceComputedDepth; + bool PixelShaderUsesSourceDepth; + bool PixelShaderUsesSourceW; + uint32_t Removed; + bool AttributeEnable; + bool PixelShaderDisablesAlphaToCoverage; + bool PixelShaderIsPerSample; + bool PixelShaderHasUAV; + bool PixelShaderUsesInputCoverageMask; +}; + +static inline void +GEN8_3DSTATE_PS_EXTRA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PS_EXTRA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PixelShaderValid, 31, 31) | + __gen_field(values->PixelShaderDoesnotwritetoRT, 30, 30) | + __gen_field(values->oMaskPresenttoRenderTarget, 29, 29) | + __gen_field(values->PixelShaderKillsPixel, 28, 28) | + __gen_field(values->PixelShaderComputedDepthMode, 26, 27) | + __gen_field(values->ForceComputedDepth, 25, 25) | + __gen_field(values->PixelShaderUsesSourceDepth, 24, 24) | + __gen_field(values->PixelShaderUsesSourceW, 23, 23) | + __gen_field(values->Removed, 17, 17) | + __gen_field(values->AttributeEnable, 8, 8) | + __gen_field(values->PixelShaderDisablesAlphaToCoverage, 7, 7) | + __gen_field(values->PixelShaderIsPerSample, 6, 6) | + __gen_field(values->PixelShaderHasUAV, 2, 2) | + __gen_field(values->PixelShaderUsesInputCoverageMask, 1, 1) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 21, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 22, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 18, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_length 0x00000002 + +struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ConstantBufferOffset; + uint32_t ConstantBufferSize; +}; + +static inline void +GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_PUSH_CONSTANT_ALLOC_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ConstantBufferOffset, 16, 20) | + __gen_field(values->ConstantBufferSize, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_RASTER_length_bias 0x00000002 +#define GEN8_3DSTATE_RASTER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 80, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_RASTER_length 0x00000005 + +struct GEN8_3DSTATE_RASTER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DX9OGL 0 +#define DX100 1 +#define DX101 2 + uint32_t APIMode; +#define Clockwise 0 +#define CounterClockwise 1 + uint32_t FrontWinding; +#define FSC_NUMRASTSAMPLES_0 0 +#define FSC_NUMRASTSAMPLES_1 1 +#define FSC_NUMRASTSAMPLES_2 2 +#define FSC_NUMRASTSAMPLES_4 3 +#define FSC_NUMRASTSAMPLES_8 4 +#define FSC_NUMRASTSAMPLES_16 5 + uint32_t ForcedSampleCount; +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_FRONT 2 +#define CULLMODE_BACK 3 + uint32_t CullMode; +#define Normal 0 +#define Force 1 + uint32_t ForceMultisampling; + bool SmoothPointEnable; + bool DXMultisampleRasterizationEnable; +#define MSRASTMODE_OFF_PIXEL 0 +#define MSRASTMODE_OFF_PATTERN 1 +#define MSRASTMODE_ON_PIXEL 2 +#define MSRASTMODE_ON_PATTERN 3 + uint32_t DXMultisampleRasterizationMode; + bool GlobalDepthOffsetEnableSolid; + bool GlobalDepthOffsetEnableWireframe; + bool GlobalDepthOffsetEnablePoint; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t FrontFaceFillMode; +#define RASTER_SOLID 0 +#define RASTER_WIREFRAME 1 +#define RASTER_POINT 2 + uint32_t BackFaceFillMode; + bool AntialiasingEnable; + bool ScissorRectangleEnable; + bool ViewportZClipTestEnable; + float GlobalDepthOffsetConstant; + float GlobalDepthOffsetScale; + float GlobalDepthOffsetClamp; +}; + +static inline void +GEN8_3DSTATE_RASTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_RASTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->APIMode, 22, 23) | + __gen_field(values->FrontWinding, 21, 21) | + __gen_field(values->ForcedSampleCount, 18, 20) | + __gen_field(values->CullMode, 16, 17) | + __gen_field(values->ForceMultisampling, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->DXMultisampleRasterizationEnable, 12, 12) | + __gen_field(values->DXMultisampleRasterizationMode, 10, 11) | + __gen_field(values->GlobalDepthOffsetEnableSolid, 9, 9) | + __gen_field(values->GlobalDepthOffsetEnableWireframe, 8, 8) | + __gen_field(values->GlobalDepthOffsetEnablePoint, 7, 7) | + __gen_field(values->FrontFaceFillMode, 5, 6) | + __gen_field(values->BackFaceFillMode, 3, 4) | + __gen_field(values->AntialiasingEnable, 2, 2) | + __gen_field(values->ScissorRectangleEnable, 1, 1) | + __gen_field(values->ViewportZClipTestEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->GlobalDepthOffsetConstant) | + 0; + + dw[3] = + __gen_float(values->GlobalDepthOffsetScale) | + 0; + + dw[4] = + __gen_float(values->GlobalDepthOffsetClamp) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 2 + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_length 0x00000000 + +#define GEN8_PALETTE_ENTRY_length 0x00000001 + +struct GEN8_PALETTE_ENTRY { + uint32_t Alpha; + uint32_t Red; + uint32_t Green; + uint32_t Blue; +}; + +static inline void +GEN8_PALETTE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PALETTE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Alpha, 24, 31) | + __gen_field(values->Red, 16, 23) | + __gen_field(values->Green, 8, 15) | + __gen_field(values->Blue, 0, 7) | + 0; + +} + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 12 + +#define GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_length 0x00000000 + +struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 45, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoDSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoDSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 46, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoGSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoGSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 44, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoHSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoHSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 47, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoPSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_PS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoPSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 43, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PointertoVSSamplerState; +}; + +static inline void +GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->PointertoVSSamplerState, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_MASK_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_MASK_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SAMPLE_MASK_length 0x00000002 + +struct GEN8_3DSTATE_SAMPLE_MASK { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_MASK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_MASK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length_bias 0x00000002 +#define GEN8_3DSTATE_SAMPLE_PATTERN_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 7 + +#define GEN8_3DSTATE_SAMPLE_PATTERN_length 0x00000009 + +struct GEN8_3DSTATE_SAMPLE_PATTERN { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + float _8xSample7XOffset; + float _8xSample7YOffset; + float _8xSample6XOffset; + float _8xSample6YOffset; + float _8xSample5XOffset; + float _8xSample5YOffset; + float _8xSample4XOffset; + float _8xSample4YOffset; + float _8xSample3XOffset; + float _8xSample3YOffset; + float _8xSample2XOffset; + float _8xSample2YOffset; + float _8xSample1XOffset; + float _8xSample1YOffset; + float _8xSample0XOffset; + float _8xSample0YOffset; + float _4xSample3XOffset; + float _4xSample3YOffset; + float _4xSample2XOffset; + float _4xSample2YOffset; + float _4xSample1XOffset; + float _4xSample1YOffset; + float _4xSample0XOffset; + float _4xSample0YOffset; + float _1xSample0XOffset; + float _1xSample0YOffset; + float _2xSample1XOffset; + float _2xSample1YOffset; + float _2xSample0XOffset; + float _2xSample0YOffset; +}; + +static inline void +GEN8_3DSTATE_SAMPLE_PATTERN_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SAMPLE_PATTERN * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 4; i += 1, j++) { + dw[j] = + 0; + } + + dw[5] = + __gen_field(values->_8xSample7XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample7YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample6XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample6YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample5XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample5YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample4XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample4YOffset * (1 << 4), 0, 3) | + 0; + + dw[6] = + __gen_field(values->_8xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_8xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_8xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_8xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_8xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_8xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_8xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_8xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[7] = + __gen_field(values->_4xSample3XOffset * (1 << 4), 28, 31) | + __gen_field(values->_4xSample3YOffset * (1 << 4), 24, 27) | + __gen_field(values->_4xSample2XOffset * (1 << 4), 20, 23) | + __gen_field(values->_4xSample2YOffset * (1 << 4), 16, 19) | + __gen_field(values->_4xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_4xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_4xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_4xSample0YOffset * (1 << 4), 0, 3) | + 0; + + dw[8] = + __gen_field(values->_1xSample0XOffset * (1 << 4), 20, 23) | + __gen_field(values->_1xSample0YOffset * (1 << 4), 16, 19) | + __gen_field(values->_2xSample1XOffset * (1 << 4), 12, 15) | + __gen_field(values->_2xSample1YOffset * (1 << 4), 8, 11) | + __gen_field(values->_2xSample0XOffset * (1 << 4), 4, 7) | + __gen_field(values->_2xSample0YOffset * (1 << 4), 0, 3) | + 0; + +} + +#define GEN8_3DSTATE_SBE_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 31, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_SBE_length 0x00000004 + +struct GEN8_3DSTATE_SBE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ForceVertexURBEntryReadLength; + bool ForceVertexURBEntryReadOffset; + uint32_t NumberofSFOutputAttributes; + bool AttributeSwizzleEnable; +#define UPPERLEFT 0 +#define LOWERLEFT 1 + uint32_t PointSpriteTextureCoordinateOrigin; + bool PrimitiveIDOverrideComponentW; + bool PrimitiveIDOverrideComponentZ; + bool PrimitiveIDOverrideComponentY; + bool PrimitiveIDOverrideComponentX; + uint32_t VertexURBEntryReadLength; + uint32_t VertexURBEntryReadOffset; + uint32_t PrimitiveIDOverrideAttributeSelect; + uint32_t PointSpriteTextureCoordinateEnable; + uint32_t ConstantInterpolationEnable; +}; + +static inline void +GEN8_3DSTATE_SBE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ForceVertexURBEntryReadLength, 29, 29) | + __gen_field(values->ForceVertexURBEntryReadOffset, 28, 28) | + __gen_field(values->NumberofSFOutputAttributes, 22, 27) | + __gen_field(values->AttributeSwizzleEnable, 21, 21) | + __gen_field(values->PointSpriteTextureCoordinateOrigin, 20, 20) | + __gen_field(values->PrimitiveIDOverrideComponentW, 19, 19) | + __gen_field(values->PrimitiveIDOverrideComponentZ, 18, 18) | + __gen_field(values->PrimitiveIDOverrideComponentY, 17, 17) | + __gen_field(values->PrimitiveIDOverrideComponentX, 16, 16) | + __gen_field(values->VertexURBEntryReadLength, 11, 15) | + __gen_field(values->VertexURBEntryReadOffset, 5, 10) | + __gen_field(values->PrimitiveIDOverrideAttributeSelect, 0, 4) | + 0; + + dw[2] = + __gen_field(values->PointSpriteTextureCoordinateEnable, 0, 31) | + 0; + + dw[3] = + __gen_field(values->ConstantInterpolationEnable, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SBE_SWIZ_length_bias 0x00000002 +#define GEN8_3DSTATE_SBE_SWIZ_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 81, \ + .DwordLength = 9 + +#define GEN8_3DSTATE_SBE_SWIZ_length 0x0000000b + +#define GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_length 0x00000001 + +struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL { + bool ComponentOverrideW; + bool ComponentOverrideZ; + bool ComponentOverrideY; + bool ComponentOverrideX; + uint32_t SwizzleControlMode; +#define CONST_0000 0 +#define CONST_0001_FLOAT 1 +#define CONST_1111_FLOAT 2 +#define PRIM_ID 3 + uint32_t ConstantSource; +#define INPUTATTR 0 +#define INPUTATTR_FACING 1 +#define INPUTATTR_W 2 +#define INPUTATTR_FACING_W 3 + uint32_t SwizzleSelect; + uint32_t SourceAttribute; +}; + +static inline void +GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ComponentOverrideW, 15, 15) | + __gen_field(values->ComponentOverrideZ, 14, 14) | + __gen_field(values->ComponentOverrideY, 13, 13) | + __gen_field(values->ComponentOverrideX, 12, 12) | + __gen_field(values->SwizzleControlMode, 11, 11) | + __gen_field(values->ConstantSource, 9, 10) | + __gen_field(values->SwizzleSelect, 6, 7) | + __gen_field(values->SourceAttribute, 0, 4) | + 0; + +} + +struct GEN8_3DSTATE_SBE_SWIZ { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + struct GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL Attribute[16]; + uint32_t AttributeWrapShortestEnables[16]; +}; + +static inline void +GEN8_3DSTATE_SBE_SWIZ_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SBE_SWIZ * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + for (uint32_t i = 0, j = 1; i < 16; i += 2, j++) { + uint32_t dw_Attribute0; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute0, &values->Attribute[i + 0]); + uint32_t dw_Attribute1; + GEN8_SF_OUTPUT_ATTRIBUTE_DETAIL_pack(data, &dw_Attribute1, &values->Attribute[i + 1]); + dw[j] = + __gen_field(dw_Attribute0, 0, 15) | + __gen_field(dw_Attribute1, 16, 31) | + 0; + } + + for (uint32_t i = 0, j = 9; i < 16; i += 8, j++) { + dw[j] = + __gen_field(values->AttributeWrapShortestEnables[i + 0], 0, 3) | + __gen_field(values->AttributeWrapShortestEnables[i + 1], 4, 7) | + __gen_field(values->AttributeWrapShortestEnables[i + 2], 8, 11) | + __gen_field(values->AttributeWrapShortestEnables[i + 3], 12, 15) | + __gen_field(values->AttributeWrapShortestEnables[i + 4], 16, 19) | + __gen_field(values->AttributeWrapShortestEnables[i + 5], 20, 23) | + __gen_field(values->AttributeWrapShortestEnables[i + 6], 24, 27) | + __gen_field(values->AttributeWrapShortestEnables[i + 7], 28, 31) | + 0; + } + +} + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length_bias 0x00000002 +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 15, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_SCISSOR_STATE_POINTERS_length 0x00000002 + +struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t ScissorRectPointer; +}; + +static inline void +GEN8_3DSTATE_SCISSOR_STATE_POINTERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SCISSOR_STATE_POINTERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->ScissorRectPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_SF_length_bias 0x00000002 +#define GEN8_3DSTATE_SF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 19, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_SF_length 0x00000004 + +struct GEN8_3DSTATE_SF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool LegacyGlobalDepthBiasEnable; + bool StatisticsEnable; + bool ViewportTransformEnable; + float LineWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; + bool LastPixelEnable; + uint32_t TriangleStripListProvokingVertexSelect; + uint32_t LineStripListProvokingVertexSelect; + uint32_t TriangleFanProvokingVertexSelect; +#define AALINEDISTANCE_TRUE 1 + uint32_t AALineDistanceMode; + bool SmoothPointEnable; + uint32_t VertexSubPixelPrecisionSelect; +#define Vertex 0 +#define State 1 + uint32_t PointWidthSource; + float PointWidth; +}; + +static inline void +GEN8_3DSTATE_SF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->LegacyGlobalDepthBiasEnable, 11, 11) | + __gen_field(values->StatisticsEnable, 10, 10) | + __gen_field(values->ViewportTransformEnable, 1, 1) | + 0; + + dw[2] = + __gen_field(values->LineWidth * (1 << 7), 18, 27) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 16, 17) | + 0; + + dw[3] = + __gen_field(values->LastPixelEnable, 31, 31) | + __gen_field(values->TriangleStripListProvokingVertexSelect, 29, 30) | + __gen_field(values->LineStripListProvokingVertexSelect, 27, 28) | + __gen_field(values->TriangleFanProvokingVertexSelect, 25, 26) | + __gen_field(values->AALineDistanceMode, 14, 14) | + __gen_field(values->SmoothPointEnable, 13, 13) | + __gen_field(values->VertexSubPixelPrecisionSelect, 12, 12) | + __gen_field(values->PointWidthSource, 11, 11) | + __gen_field(values->PointWidth * (1 << 3), 0, 10) | + 0; + +} + +#define GEN8_3DSTATE_SO_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 24, \ + .DwordLength = 6 + +#define GEN8_3DSTATE_SO_BUFFER_length 0x00000008 + +struct GEN8_3DSTATE_SO_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool SOBufferEnable; + uint32_t SOBufferIndex; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE SOBufferObjectControlState; + bool StreamOffsetWriteEnable; + bool StreamOutputBufferOffsetAddressEnable; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceSize; + __gen_address_type StreamOutputBufferOffsetAddress; + uint32_t StreamOffset; +}; + +static inline void +GEN8_3DSTATE_SO_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_SOBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_SOBufferObjectControlState, &values->SOBufferObjectControlState); + dw[1] = + __gen_field(values->SOBufferEnable, 31, 31) | + __gen_field(values->SOBufferIndex, 29, 30) | + __gen_field(dw_SOBufferObjectControlState, 22, 28) | + __gen_field(values->StreamOffsetWriteEnable, 21, 21) | + __gen_field(values->StreamOutputBufferOffsetAddressEnable, 20, 20) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceSize, 0, 29) | + 0; + + uint32_t dw5 = + 0; + + uint64_t qw5 = + __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, dw5); + + dw[5] = qw5; + dw[6] = qw5 >> 32; + + dw[7] = + __gen_field(values->StreamOffset, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_SO_DECL_LIST_length_bias 0x00000002 +#define GEN8_3DSTATE_SO_DECL_LIST_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 1, \ + ._3DCommandSubOpcode = 23 + +#define GEN8_3DSTATE_SO_DECL_LIST_length 0x00000000 + +#define GEN8_SO_DECL_ENTRY_length 0x00000002 + +#define GEN8_SO_DECL_length 0x00000001 + +struct GEN8_SO_DECL { + uint32_t OutputBufferSlot; + uint32_t HoleFlag; + uint32_t RegisterIndex; + uint32_t ComponentMask; +}; + +static inline void +GEN8_SO_DECL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->OutputBufferSlot, 12, 13) | + __gen_field(values->HoleFlag, 11, 11) | + __gen_field(values->RegisterIndex, 4, 9) | + __gen_field(values->ComponentMask, 0, 3) | + 0; + +} + +struct GEN8_SO_DECL_ENTRY { + struct GEN8_SO_DECL Stream3Decl; + struct GEN8_SO_DECL Stream2Decl; + struct GEN8_SO_DECL Stream1Decl; + struct GEN8_SO_DECL Stream0Decl; +}; + +static inline void +GEN8_SO_DECL_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SO_DECL_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_Stream3Decl; + GEN8_SO_DECL_pack(data, &dw_Stream3Decl, &values->Stream3Decl); + uint32_t dw_Stream2Decl; + GEN8_SO_DECL_pack(data, &dw_Stream2Decl, &values->Stream2Decl); + uint32_t dw_Stream1Decl; + GEN8_SO_DECL_pack(data, &dw_Stream1Decl, &values->Stream1Decl); + uint32_t dw_Stream0Decl; + GEN8_SO_DECL_pack(data, &dw_Stream0Decl, &values->Stream0Decl); + uint64_t qw0 = + __gen_field(dw_Stream3Decl, 48, 63) | + __gen_field(dw_Stream2Decl, 32, 47) | + __gen_field(dw_Stream1Decl, 16, 31) | + __gen_field(dw_Stream0Decl, 0, 15) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN8_3DSTATE_SO_DECL_LIST { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StreamtoBufferSelects3; + uint32_t StreamtoBufferSelects2; + uint32_t StreamtoBufferSelects1; + uint32_t StreamtoBufferSelects0; + uint32_t NumEntries3; + uint32_t NumEntries2; + uint32_t NumEntries1; + uint32_t NumEntries0; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_SO_DECL_LIST_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_SO_DECL_LIST * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 8) | + 0; + + dw[1] = + __gen_field(values->StreamtoBufferSelects3, 12, 15) | + __gen_field(values->StreamtoBufferSelects2, 8, 11) | + __gen_field(values->StreamtoBufferSelects1, 4, 7) | + __gen_field(values->StreamtoBufferSelects0, 0, 3) | + 0; + + dw[2] = + __gen_field(values->NumEntries3, 24, 31) | + __gen_field(values->NumEntries2, 16, 23) | + __gen_field(values->NumEntries1, 8, 15) | + __gen_field(values->NumEntries0, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_STENCIL_BUFFER_length_bias 0x00000002 +#define GEN8_3DSTATE_STENCIL_BUFFER_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 6, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_STENCIL_BUFFER_length 0x00000005 + +struct GEN8_3DSTATE_STENCIL_BUFFER { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilBufferEnable; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE StencilBufferObjectControlState; + uint32_t SurfacePitch; + __gen_address_type SurfaceBaseAddress; + uint32_t SurfaceQPitch; +}; + +static inline void +GEN8_3DSTATE_STENCIL_BUFFER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STENCIL_BUFFER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw_StencilBufferObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_StencilBufferObjectControlState, &values->StencilBufferObjectControlState); + dw[1] = + __gen_field(values->StencilBufferEnable, 31, 31) | + __gen_field(dw_StencilBufferObjectControlState, 22, 28) | + __gen_field(values->SurfacePitch, 0, 16) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + dw[4] = + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + +} + +#define GEN8_3DSTATE_STREAMOUT_length_bias 0x00000002 +#define GEN8_3DSTATE_STREAMOUT_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 30, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_STREAMOUT_length 0x00000005 + +struct GEN8_3DSTATE_STREAMOUT { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SOFunctionEnable; + uint32_t APIRenderingDisable; + uint32_t RenderStreamSelect; +#define LEADING 0 +#define TRAILING 1 + uint32_t ReorderMode; + bool SOStatisticsEnable; +#define Normal 0 +#define Resreved 1 +#define Force_Off 2 +#define Force_on 3 + uint32_t ForceRendering; + uint32_t Stream3VertexReadOffset; + uint32_t Stream3VertexReadLength; + uint32_t Stream2VertexReadOffset; + uint32_t Stream2VertexReadLength; + uint32_t Stream1VertexReadOffset; + uint32_t Stream1VertexReadLength; + uint32_t Stream0VertexReadOffset; + uint32_t Stream0VertexReadLength; + uint32_t Buffer1SurfacePitch; + uint32_t Buffer0SurfacePitch; + uint32_t Buffer3SurfacePitch; + uint32_t Buffer2SurfacePitch; +}; + +static inline void +GEN8_3DSTATE_STREAMOUT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_STREAMOUT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->SOFunctionEnable, 31, 31) | + __gen_field(values->APIRenderingDisable, 30, 30) | + __gen_field(values->RenderStreamSelect, 27, 28) | + __gen_field(values->ReorderMode, 26, 26) | + __gen_field(values->SOStatisticsEnable, 25, 25) | + __gen_field(values->ForceRendering, 23, 24) | + 0; + + dw[2] = + __gen_field(values->Stream3VertexReadOffset, 29, 29) | + __gen_field(values->Stream3VertexReadLength, 24, 28) | + __gen_field(values->Stream2VertexReadOffset, 21, 21) | + __gen_field(values->Stream2VertexReadLength, 16, 20) | + __gen_field(values->Stream1VertexReadOffset, 13, 13) | + __gen_field(values->Stream1VertexReadLength, 8, 12) | + __gen_field(values->Stream0VertexReadOffset, 5, 5) | + __gen_field(values->Stream0VertexReadLength, 0, 4) | + 0; + + dw[3] = + __gen_field(values->Buffer1SurfacePitch, 16, 27) | + __gen_field(values->Buffer0SurfacePitch, 0, 11) | + 0; + + dw[4] = + __gen_field(values->Buffer3SurfacePitch, 16, 27) | + __gen_field(values->Buffer2SurfacePitch, 0, 11) | + 0; + +} + +#define GEN8_3DSTATE_TE_length_bias 0x00000002 +#define GEN8_3DSTATE_TE_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 28, \ + .DwordLength = 2 + +#define GEN8_3DSTATE_TE_length 0x00000004 + +struct GEN8_3DSTATE_TE { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define INTEGER 0 +#define ODD_FRACTIONAL 1 +#define EVEN_FRACTIONAL 2 + uint32_t Partitioning; +#define POINT 0 +#define OUTPUT_LINE 1 +#define OUTPUT_TRI_CW 2 +#define OUTPUT_TRI_CCW 3 + uint32_t OutputTopology; +#define QUAD 0 +#define TRI 1 +#define ISOLINE 2 + uint32_t TEDomain; +#define HW_TESS 0 +#define SW_TESS 1 + uint32_t TEMode; + bool TEEnable; + float MaximumTessellationFactorOdd; + float MaximumTessellationFactorNotOdd; +}; + +static inline void +GEN8_3DSTATE_TE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_TE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Partitioning, 12, 13) | + __gen_field(values->OutputTopology, 8, 9) | + __gen_field(values->TEDomain, 4, 5) | + __gen_field(values->TEMode, 1, 2) | + __gen_field(values->TEEnable, 0, 0) | + 0; + + dw[2] = + __gen_float(values->MaximumTessellationFactorOdd) | + 0; + + dw[3] = + __gen_float(values->MaximumTessellationFactorNotOdd) | + 0; + +} + +#define GEN8_3DSTATE_URB_DS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_DS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 50, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_DS_length 0x00000002 + +struct GEN8_3DSTATE_URB_DS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t DSURBStartingAddress; + uint32_t DSURBEntryAllocationSize; + uint32_t DSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_DS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_DS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DSURBStartingAddress, 25, 31) | + __gen_field(values->DSURBEntryAllocationSize, 16, 24) | + __gen_field(values->DSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_GS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_GS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 51, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_GS_length 0x00000002 + +struct GEN8_3DSTATE_URB_GS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t GSURBStartingAddress; + uint32_t GSURBEntryAllocationSize; + uint32_t GSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_GS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_GS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->GSURBStartingAddress, 25, 31) | + __gen_field(values->GSURBEntryAllocationSize, 16, 24) | + __gen_field(values->GSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_URB_HS_length_bias 0x00000002 +#define GEN8_3DSTATE_URB_HS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 49, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_URB_HS_length 0x00000002 + +struct GEN8_3DSTATE_URB_HS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t HSURBStartingAddress; + uint32_t HSURBEntryAllocationSize; + uint32_t HSNumberofURBEntries; +}; + +static inline void +GEN8_3DSTATE_URB_HS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_URB_HS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->HSURBStartingAddress, 25, 31) | + __gen_field(values->HSURBEntryAllocationSize, 16, 24) | + __gen_field(values->HSNumberofURBEntries, 0, 15) | + 0; + +} + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_BUFFERS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 8 + +#define GEN8_3DSTATE_VERTEX_BUFFERS_length 0x00000000 + +#define GEN8_VERTEX_BUFFER_STATE_length 0x00000004 + +struct GEN8_VERTEX_BUFFER_STATE { + uint32_t VertexBufferIndex; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + uint32_t AddressModifyEnable; + bool NullVertexBuffer; + uint32_t BufferPitch; + __gen_address_type BufferStartingAddress; + uint32_t BufferSize; +}; + +static inline void +GEN8_VERTEX_BUFFER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_BUFFER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(dw_MemoryObjectControlState, 16, 22) | + __gen_field(values->AddressModifyEnable, 14, 14) | + __gen_field(values->NullVertexBuffer, 13, 13) | + __gen_field(values->BufferPitch, 0, 11) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BufferStartingAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->BufferSize, 0, 31) | + 0; + +} + +struct GEN8_3DSTATE_VERTEX_BUFFERS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_BUFFERS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_BUFFERS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length_bias 0x00000002 +#define GEN8_3DSTATE_VERTEX_ELEMENTS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 9 + +#define GEN8_3DSTATE_VERTEX_ELEMENTS_length 0x00000000 + +#define GEN8_VERTEX_ELEMENT_STATE_length 0x00000002 + +struct GEN8_VERTEX_ELEMENT_STATE { + uint32_t VertexBufferIndex; + bool Valid; + uint32_t SourceElementFormat; + bool EdgeFlagEnable; + uint32_t SourceElementOffset; + uint32_t Component0Control; + uint32_t Component1Control; + uint32_t Component2Control; + uint32_t Component3Control; +}; + +static inline void +GEN8_VERTEX_ELEMENT_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_VERTEX_ELEMENT_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->VertexBufferIndex, 26, 31) | + __gen_field(values->Valid, 25, 25) | + __gen_field(values->SourceElementFormat, 16, 24) | + __gen_field(values->EdgeFlagEnable, 15, 15) | + __gen_field(values->SourceElementOffset, 0, 11) | + 0; + + dw[1] = + __gen_field(values->Component0Control, 28, 30) | + __gen_field(values->Component1Control, 24, 26) | + __gen_field(values->Component2Control, 20, 22) | + __gen_field(values->Component3Control, 16, 18) | + 0; + +} + +struct GEN8_3DSTATE_VERTEX_ELEMENTS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + /* variable length fields follow */ +}; + +static inline void +GEN8_3DSTATE_VERTEX_ELEMENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VERTEX_ELEMENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_3DSTATE_VF_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 12, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_length 0x00000002 + +struct GEN8_3DSTATE_VF { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool IndexedDrawCutIndexEnable; + uint32_t DwordLength; + uint32_t CutIndex; +}; + +static inline void +GEN8_3DSTATE_VF_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->IndexedDrawCutIndexEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CutIndex, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_INSTANCING_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_INSTANCING_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 73, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_VF_INSTANCING_length 0x00000003 + +struct GEN8_3DSTATE_VF_INSTANCING { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstancingEnable; + uint32_t VertexElementIndex; + uint32_t InstanceDataStepRate; +}; + +static inline void +GEN8_3DSTATE_VF_INSTANCING_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_INSTANCING * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstancingEnable, 8, 8) | + __gen_field(values->VertexElementIndex, 0, 5) | + 0; + + dw[2] = + __gen_field(values->InstanceDataStepRate, 0, 31) | + 0; + +} + +#define GEN8_3DSTATE_VF_SGVS_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_SGVS_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 74, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_SGVS_length 0x00000002 + +struct GEN8_3DSTATE_VF_SGVS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool InstanceIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t InstanceIDComponentNumber; + uint32_t InstanceIDElementOffset; + bool VertexIDEnable; +#define COMP_0 0 +#define COMP_1 1 +#define COMP_2 2 +#define COMP_3 3 + uint32_t VertexIDComponentNumber; + uint32_t VertexIDElementOffset; +}; + +static inline void +GEN8_3DSTATE_VF_SGVS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_SGVS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InstanceIDEnable, 31, 31) | + __gen_field(values->InstanceIDComponentNumber, 29, 30) | + __gen_field(values->InstanceIDElementOffset, 16, 21) | + __gen_field(values->VertexIDEnable, 15, 15) | + __gen_field(values->VertexIDComponentNumber, 13, 14) | + __gen_field(values->VertexIDElementOffset, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VF_STATISTICS_length_bias 0x00000001 +#define GEN8_3DSTATE_VF_STATISTICS_header \ + .CommandType = 3, \ + .CommandSubType = 1, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 11 + +#define GEN8_3DSTATE_VF_STATISTICS_length 0x00000001 + +struct GEN8_3DSTATE_VF_STATISTICS { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + bool StatisticsEnable; +}; + +static inline void +GEN8_3DSTATE_VF_STATISTICS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_STATISTICS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->StatisticsEnable, 0, 0) | + 0; + +} + +#define GEN8_3DSTATE_VF_TOPOLOGY_length_bias 0x00000002 +#define GEN8_3DSTATE_VF_TOPOLOGY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 75, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VF_TOPOLOGY_length 0x00000002 + +struct GEN8_3DSTATE_VF_TOPOLOGY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t PrimitiveTopologyType; +}; + +static inline void +GEN8_3DSTATE_VF_TOPOLOGY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VF_TOPOLOGY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->PrimitiveTopologyType, 0, 5) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 35, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_length 0x00000002 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t CCViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->CCViewportPointer, 5, 31) | + 0; + +} + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length_bias 0x00000002 +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_header\ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 33, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_length 0x00000002 + +struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t SFClipViewportPointer; +}; + +static inline void +GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->SFClipViewportPointer, 6, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 20, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_WM_length 0x00000002 + +struct GEN8_3DSTATE_WM { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StatisticsEnable; + bool LegacyDepthBufferClearEnable; + bool LegacyDepthBufferResolveEnable; + bool LegacyHierarchicalDepthBufferResolveEnable; + bool LegacyDiamondLineRasterization; +#define NORMAL 0 +#define PSEXEC 1 +#define PREPS 2 + uint32_t EarlyDepthStencilControl; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceThreadDispatchEnable; +#define INTERP_PIXEL 0 +#define INTERP_CENTROID 2 +#define INTERP_SAMPLE 3 + uint32_t PositionZWInterpolationMode; + uint32_t BarycentricInterpolationMode; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineEndCapAntialiasingRegionWidth; +#define _05pixels 0 +#define _10pixels 1 +#define _20pixels 2 +#define _40pixels 3 + uint32_t LineAntialiasingRegionWidth; + bool PolygonStippleEnable; + bool LineStippleEnable; +#define RASTRULE_UPPER_LEFT 0 +#define RASTRULE_UPPER_RIGHT 1 + uint32_t PointRasterizationRule; +#define Normal 0 +#define ForceOff 1 +#define ForceON 2 + uint32_t ForceKillPixelEnable; +}; + +static inline void +GEN8_3DSTATE_WM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StatisticsEnable, 31, 31) | + __gen_field(values->LegacyDepthBufferClearEnable, 30, 30) | + __gen_field(values->LegacyDepthBufferResolveEnable, 28, 28) | + __gen_field(values->LegacyHierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->LegacyDiamondLineRasterization, 26, 26) | + __gen_field(values->EarlyDepthStencilControl, 21, 22) | + __gen_field(values->ForceThreadDispatchEnable, 19, 20) | + __gen_field(values->PositionZWInterpolationMode, 17, 18) | + __gen_field(values->BarycentricInterpolationMode, 11, 16) | + __gen_field(values->LineEndCapAntialiasingRegionWidth, 8, 9) | + __gen_field(values->LineAntialiasingRegionWidth, 6, 7) | + __gen_field(values->PolygonStippleEnable, 4, 4) | + __gen_field(values->LineStippleEnable, 3, 3) | + __gen_field(values->PointRasterizationRule, 2, 2) | + __gen_field(values->ForceKillPixelEnable, 0, 1) | + 0; + +} + +#define GEN8_3DSTATE_WM_CHROMAKEY_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_CHROMAKEY_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 76, \ + .DwordLength = 0 + +#define GEN8_3DSTATE_WM_CHROMAKEY_length 0x00000002 + +struct GEN8_3DSTATE_WM_CHROMAKEY { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool ChromaKeyKillEnable; +}; + +static inline void +GEN8_3DSTATE_WM_CHROMAKEY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_CHROMAKEY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->ChromaKeyKillEnable, 31, 31) | + 0; + +} + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 78, \ + .DwordLength = 1 + +#define GEN8_3DSTATE_WM_DEPTH_STENCIL_length 0x00000003 + +struct GEN8_3DSTATE_WM_DEPTH_STENCIL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + uint32_t StencilFailOp; + uint32_t StencilPassDepthFailOp; + uint32_t StencilPassDepthPassOp; + uint32_t BackfaceStencilTestFunction; + uint32_t BackfaceStencilFailOp; + uint32_t BackfaceStencilPassDepthFailOp; + uint32_t BackfaceStencilPassDepthPassOp; + uint32_t StencilTestFunction; + uint32_t DepthTestFunction; + bool DoubleSidedStencilEnable; + bool StencilTestEnable; + bool StencilBufferWriteEnable; + bool DepthTestEnable; + bool DepthBufferWriteEnable; + uint32_t StencilTestMask; + uint32_t StencilWriteMask; + uint32_t BackfaceStencilTestMask; + uint32_t BackfaceStencilWriteMask; +}; + +static inline void +GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_DEPTH_STENCIL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilFailOp, 29, 31) | + __gen_field(values->StencilPassDepthFailOp, 26, 28) | + __gen_field(values->StencilPassDepthPassOp, 23, 25) | + __gen_field(values->BackfaceStencilTestFunction, 20, 22) | + __gen_field(values->BackfaceStencilFailOp, 17, 19) | + __gen_field(values->BackfaceStencilPassDepthFailOp, 14, 16) | + __gen_field(values->BackfaceStencilPassDepthPassOp, 11, 13) | + __gen_field(values->StencilTestFunction, 8, 10) | + __gen_field(values->DepthTestFunction, 5, 7) | + __gen_field(values->DoubleSidedStencilEnable, 4, 4) | + __gen_field(values->StencilTestEnable, 3, 3) | + __gen_field(values->StencilBufferWriteEnable, 2, 2) | + __gen_field(values->DepthTestEnable, 1, 1) | + __gen_field(values->DepthBufferWriteEnable, 0, 0) | + 0; + + dw[2] = + __gen_field(values->StencilTestMask, 24, 31) | + __gen_field(values->StencilWriteMask, 16, 23) | + __gen_field(values->BackfaceStencilTestMask, 8, 15) | + __gen_field(values->BackfaceStencilWriteMask, 0, 7) | + 0; + +} + +#define GEN8_3DSTATE_WM_HZ_OP_length_bias 0x00000002 +#define GEN8_3DSTATE_WM_HZ_OP_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 0, \ + ._3DCommandSubOpcode = 82, \ + .DwordLength = 3 + +#define GEN8_3DSTATE_WM_HZ_OP_length 0x00000005 + +struct GEN8_3DSTATE_WM_HZ_OP { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; + bool StencilBufferClearEnable; + bool DepthBufferClearEnable; + bool ScissorRectangleEnable; + bool DepthBufferResolveEnable; + bool HierarchicalDepthBufferResolveEnable; + uint32_t PixelPositionOffsetEnable; + bool FullSurfaceDepthClear; + uint32_t StencilClearValue; + uint32_t NumberofMultisamples; + uint32_t ClearRectangleYMin; + uint32_t ClearRectangleXMin; + uint32_t ClearRectangleYMax; + uint32_t ClearRectangleXMax; + uint32_t SampleMask; +}; + +static inline void +GEN8_3DSTATE_WM_HZ_OP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_3DSTATE_WM_HZ_OP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->StencilBufferClearEnable, 31, 31) | + __gen_field(values->DepthBufferClearEnable, 30, 30) | + __gen_field(values->ScissorRectangleEnable, 29, 29) | + __gen_field(values->DepthBufferResolveEnable, 28, 28) | + __gen_field(values->HierarchicalDepthBufferResolveEnable, 27, 27) | + __gen_field(values->PixelPositionOffsetEnable, 26, 26) | + __gen_field(values->FullSurfaceDepthClear, 25, 25) | + __gen_field(values->StencilClearValue, 16, 23) | + __gen_field(values->NumberofMultisamples, 13, 15) | + 0; + + dw[2] = + __gen_field(values->ClearRectangleYMin, 16, 31) | + __gen_field(values->ClearRectangleXMin, 0, 15) | + 0; + + dw[3] = + __gen_field(values->ClearRectangleYMax, 16, 31) | + __gen_field(values->ClearRectangleXMax, 0, 15) | + 0; + + dw[4] = + __gen_field(values->SampleMask, 0, 15) | + 0; + +} + +#define GEN8_GPGPU_WALKER_length_bias 0x00000002 +#define GEN8_GPGPU_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 5, \ + .DwordLength = 13 + +#define GEN8_GPGPU_WALKER_length 0x0000000f + +struct GEN8_GPGPU_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + bool IndirectParameterEnable; + bool PredicateEnable; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; +#define SIMD8 0 +#define SIMD16 1 +#define SIMD32 2 + uint32_t SIMDSize; + uint32_t ThreadDepthCounterMaximum; + uint32_t ThreadHeightCounterMaximum; + uint32_t ThreadWidthCounterMaximum; + uint32_t ThreadGroupIDStartingX; + uint32_t ThreadGroupIDXDimension; + uint32_t ThreadGroupIDStartingY; + uint32_t ThreadGroupIDYDimension; + uint32_t ThreadGroupIDStartingResumeZ; + uint32_t ThreadGroupIDZDimension; + uint32_t RightExecutionMask; + uint32_t BottomExecutionMask; +}; + +static inline void +GEN8_GPGPU_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_GPGPU_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->IndirectParameterEnable, 10, 10) | + __gen_field(values->PredicateEnable, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 6, 31) | + 0; + + dw[4] = + __gen_field(values->SIMDSize, 30, 31) | + __gen_field(values->ThreadDepthCounterMaximum, 16, 21) | + __gen_field(values->ThreadHeightCounterMaximum, 8, 13) | + __gen_field(values->ThreadWidthCounterMaximum, 0, 5) | + 0; + + dw[5] = + __gen_field(values->ThreadGroupIDStartingX, 0, 31) | + 0; + + dw[6] = + 0; + + dw[7] = + __gen_field(values->ThreadGroupIDXDimension, 0, 31) | + 0; + + dw[8] = + __gen_field(values->ThreadGroupIDStartingY, 0, 31) | + 0; + + dw[9] = + 0; + + dw[10] = + __gen_field(values->ThreadGroupIDYDimension, 0, 31) | + 0; + + dw[11] = + __gen_field(values->ThreadGroupIDStartingResumeZ, 0, 31) | + 0; + + dw[12] = + __gen_field(values->ThreadGroupIDZDimension, 0, 31) | + 0; + + dw[13] = + __gen_field(values->RightExecutionMask, 0, 31) | + 0; + + dw[14] = + __gen_field(values->BottomExecutionMask, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_CURBE_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_CURBE_LOAD_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 1, \ + .DwordLength = 2 + +#define GEN8_MEDIA_CURBE_LOAD_length 0x00000004 + +struct GEN8_MEDIA_CURBE_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t CURBETotalDataLength; + uint32_t CURBEDataStartAddress; +}; + +static inline void +GEN8_MEDIA_CURBE_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_CURBE_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->CURBETotalDataLength, 0, 16) | + 0; + + dw[3] = + __gen_field(values->CURBEDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length_bias 0x00000002 +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_header\ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 2, \ + .DwordLength = 2 + +#define GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_length 0x00000004 + +struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorTotalLength; + uint32_t InterfaceDescriptorDataStartAddress; +}; + +static inline void +GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + 0; + + dw[2] = + __gen_field(values->InterfaceDescriptorTotalLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->InterfaceDescriptorDataStartAddress, 0, 31) | + 0; + +} + +#define GEN8_MEDIA_OBJECT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 0 + +#define GEN8_MEDIA_OBJECT_length 0x00000000 + +struct GEN8_MEDIA_OBJECT { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoredboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoredboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_GRPID_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_GRPID_header \ + .CommandType = 3, \ + .MediaCommandPipeline = 2, \ + .MediaCommandOpcode = 1, \ + .MediaCommandSubOpcode = 6 + +#define GEN8_MEDIA_OBJECT_GRPID_length 0x00000000 + +struct GEN8_MEDIA_OBJECT_GRPID { + uint32_t CommandType; + uint32_t MediaCommandPipeline; + uint32_t MediaCommandOpcode; + uint32_t MediaCommandSubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + uint32_t EndofThreadGroup; + uint32_t ForceDestination; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; +#define Slice0 0 +#define Slice1 1 +#define Slice2 2 + uint32_t SliceDestinationSelect; +#define SubSlice2 2 +#define SubSlice1 1 +#define SubSlice0 0 + uint32_t SubSliceDestinationSelect; + uint32_t IndirectDataLength; + __gen_address_type IndirectDataStartAddress; + uint32_t ScoreboardY; + uint32_t ScoreboardX; + uint32_t ScoreboardColor; + bool ScoreboardMask; + uint32_t GroupID; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_GRPID_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_GRPID * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MediaCommandPipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->MediaCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->EndofThreadGroup, 23, 23) | + __gen_field(values->ForceDestination, 22, 22) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->SliceDestinationSelect, 19, 20) | + __gen_field(values->SubSliceDestinationSelect, 17, 18) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + uint32_t dw3 = + 0; + + dw[3] = + __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, dw3); + + dw[4] = + __gen_field(values->ScoreboardY, 16, 24) | + __gen_field(values->ScoreboardX, 0, 8) | + 0; + + dw[5] = + __gen_field(values->ScoreboardColor, 16, 19) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->GroupID, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_OBJECT_PRT_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_PRT_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 2, \ + .DwordLength = 14 + +#define GEN8_MEDIA_OBJECT_PRT_length 0x00000010 + +struct GEN8_MEDIA_OBJECT_PRT { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; + bool PRT_FenceNeeded; +#define Rootthreadqueue 0 +#define VFEstateflush 1 + uint32_t PRT_FenceType; + uint32_t InlineData[12]; +}; + +static inline void +GEN8_MEDIA_OBJECT_PRT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_PRT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->PRT_FenceNeeded, 23, 23) | + __gen_field(values->PRT_FenceType, 22, 22) | + 0; + + dw[3] = + 0; + + for (uint32_t i = 0, j = 4; i < 12; i += 1, j++) { + dw[j] = + __gen_field(values->InlineData[i + 0], 0, 31) | + 0; + } + +} + +#define GEN8_MEDIA_OBJECT_WALKER_length_bias 0x00000002 +#define GEN8_MEDIA_OBJECT_WALKER_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 1, \ + .SubOpcode = 3 + +#define GEN8_MEDIA_OBJECT_WALKER_length 0x00000000 + +struct GEN8_MEDIA_OBJECT_WALKER { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t InterfaceDescriptorOffset; + bool ChildrenPresent; +#define Nothreadsynchronization 0 +#define Threaddispatchissynchronizedbythespawnrootthreadmessage 1 + uint32_t ThreadSynchronization; +#define Notusingscoreboard 0 +#define Usingscoreboard 1 + uint32_t UseScoreboard; + uint32_t IndirectDataLength; + uint32_t IndirectDataStartAddress; + uint32_t GroupIDLoopSelect; + bool ScoreboardMask; + uint32_t ColorCountMinusOne; + uint32_t MiddleLoopExtraSteps; + uint32_t LocalMidLoopUnitY; + uint32_t MidLoopUnitX; + uint32_t GlobalLoopExecCount; + uint32_t LocalLoopExecCount; + uint32_t BlockResolutionY; + uint32_t BlockResolutionX; + uint32_t LocalStartY; + uint32_t LocalStartX; + uint32_t LocalOuterLoopStrideY; + uint32_t LocalOuterLoopStrideX; + uint32_t LocalInnerLoopUnitY; + uint32_t LocalInnerLoopUnitX; + uint32_t GlobalResolutionY; + uint32_t GlobalResolutionX; + uint32_t GlobalStartY; + uint32_t GlobalStartX; + uint32_t GlobalOuterLoopStrideY; + uint32_t GlobalOuterLoopStrideX; + uint32_t GlobalInnerLoopUnitY; + uint32_t GlobalInnerLoopUnitX; + /* variable length fields follow */ +}; + +static inline void +GEN8_MEDIA_OBJECT_WALKER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_OBJECT_WALKER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + + dw[2] = + __gen_field(values->ChildrenPresent, 31, 31) | + __gen_field(values->ThreadSynchronization, 24, 24) | + __gen_field(values->UseScoreboard, 21, 21) | + __gen_field(values->IndirectDataLength, 0, 16) | + 0; + + dw[3] = + __gen_offset(values->IndirectDataStartAddress, 0, 31) | + 0; + + dw[4] = + 0; + + dw[5] = + __gen_field(values->GroupIDLoopSelect, 8, 31) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[6] = + __gen_field(values->ColorCountMinusOne, 24, 27) | + __gen_field(values->MiddleLoopExtraSteps, 16, 20) | + __gen_field(values->LocalMidLoopUnitY, 12, 13) | + __gen_field(values->MidLoopUnitX, 8, 9) | + 0; + + dw[7] = + __gen_field(values->GlobalLoopExecCount, 16, 25) | + __gen_field(values->LocalLoopExecCount, 0, 9) | + 0; + + dw[8] = + __gen_field(values->BlockResolutionY, 16, 24) | + __gen_field(values->BlockResolutionX, 0, 8) | + 0; + + dw[9] = + __gen_field(values->LocalStartY, 16, 24) | + __gen_field(values->LocalStartX, 0, 8) | + 0; + + dw[10] = + 0; + + dw[11] = + __gen_field(values->LocalOuterLoopStrideY, 16, 25) | + __gen_field(values->LocalOuterLoopStrideX, 0, 9) | + 0; + + dw[12] = + __gen_field(values->LocalInnerLoopUnitY, 16, 25) | + __gen_field(values->LocalInnerLoopUnitX, 0, 9) | + 0; + + dw[13] = + __gen_field(values->GlobalResolutionY, 16, 24) | + __gen_field(values->GlobalResolutionX, 0, 8) | + 0; + + dw[14] = + __gen_field(values->GlobalStartY, 16, 25) | + __gen_field(values->GlobalStartX, 0, 9) | + 0; + + dw[15] = + __gen_field(values->GlobalOuterLoopStrideY, 16, 25) | + __gen_field(values->GlobalOuterLoopStrideX, 0, 9) | + 0; + + dw[16] = + __gen_field(values->GlobalInnerLoopUnitY, 16, 25) | + __gen_field(values->GlobalInnerLoopUnitX, 0, 9) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MEDIA_STATE_FLUSH_length_bias 0x00000002 +#define GEN8_MEDIA_STATE_FLUSH_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 4, \ + .DwordLength = 0 + +#define GEN8_MEDIA_STATE_FLUSH_length 0x00000002 + +struct GEN8_MEDIA_STATE_FLUSH { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + bool FlushtoGO; + uint32_t WatermarkRequired; + uint32_t InterfaceDescriptorOffset; +}; + +static inline void +GEN8_MEDIA_STATE_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_STATE_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_field(values->FlushtoGO, 7, 7) | + __gen_field(values->WatermarkRequired, 6, 6) | + __gen_field(values->InterfaceDescriptorOffset, 0, 5) | + 0; + +} + +#define GEN8_MEDIA_VFE_STATE_length_bias 0x00000002 +#define GEN8_MEDIA_VFE_STATE_header \ + .CommandType = 3, \ + .Pipeline = 2, \ + .MediaCommandOpcode = 0, \ + .SubOpcode = 0, \ + .DwordLength = 7 + +#define GEN8_MEDIA_VFE_STATE_length 0x00000009 + +struct GEN8_MEDIA_VFE_STATE { + uint32_t CommandType; + uint32_t Pipeline; + uint32_t MediaCommandOpcode; + uint32_t SubOpcode; + uint32_t DwordLength; + uint32_t ScratchSpaceBasePointer; + uint32_t StackSize; + uint32_t PerThreadScratchSpace; + uint32_t ScratchSpaceBasePointerHigh; + uint32_t MaximumNumberofThreads; + uint32_t NumberofURBEntries; +#define Maintainingtheexistingtimestampstate 0 +#define Resettingrelativetimerandlatchingtheglobaltimestamp 1 + uint32_t ResetGatewayTimer; +#define MaintainingOpenGatewayForwardMsgCloseGatewayprotocollegacymode 0 +#define BypassingOpenGatewayCloseGatewayprotocol 1 + uint32_t BypassGatewayControl; + uint32_t SliceDisable; + uint32_t URBEntryAllocationSize; + uint32_t CURBEAllocationSize; +#define Scoreboarddisabled 0 +#define Scoreboardenabled 1 + uint32_t ScoreboardEnable; +#define StallingScoreboard 0 +#define NonStallingScoreboard 1 + uint32_t ScoreboardType; + uint32_t ScoreboardMask; + uint32_t Scoreboard3DeltaY; + uint32_t Scoreboard3DeltaX; + uint32_t Scoreboard2DeltaY; + uint32_t Scoreboard2DeltaX; + uint32_t Scoreboard1DeltaY; + uint32_t Scoreboard1DeltaX; + uint32_t Scoreboard0DeltaY; + uint32_t Scoreboard0DeltaX; + uint32_t Scoreboard7DeltaY; + uint32_t Scoreboard7DeltaX; + uint32_t Scoreboard6DeltaY; + uint32_t Scoreboard6DeltaX; + uint32_t Scoreboard5DeltaY; + uint32_t Scoreboard5DeltaX; + uint32_t Scoreboard4DeltaY; + uint32_t Scoreboard4DeltaX; +}; + +static inline void +GEN8_MEDIA_VFE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MEDIA_VFE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->Pipeline, 27, 28) | + __gen_field(values->MediaCommandOpcode, 24, 26) | + __gen_field(values->SubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 15) | + 0; + + dw[1] = + __gen_offset(values->ScratchSpaceBasePointer, 10, 31) | + __gen_field(values->StackSize, 4, 7) | + __gen_field(values->PerThreadScratchSpace, 0, 3) | + 0; + + dw[2] = + __gen_offset(values->ScratchSpaceBasePointerHigh, 0, 15) | + 0; + + dw[3] = + __gen_field(values->MaximumNumberofThreads, 16, 31) | + __gen_field(values->NumberofURBEntries, 8, 15) | + __gen_field(values->ResetGatewayTimer, 7, 7) | + __gen_field(values->BypassGatewayControl, 6, 6) | + 0; + + dw[4] = + __gen_field(values->SliceDisable, 0, 1) | + 0; + + dw[5] = + __gen_field(values->URBEntryAllocationSize, 16, 31) | + __gen_field(values->CURBEAllocationSize, 0, 15) | + 0; + + dw[6] = + __gen_field(values->ScoreboardEnable, 31, 31) | + __gen_field(values->ScoreboardType, 30, 30) | + __gen_field(values->ScoreboardMask, 0, 7) | + 0; + + dw[7] = + __gen_field(values->Scoreboard3DeltaY, 28, 31) | + __gen_field(values->Scoreboard3DeltaX, 24, 27) | + __gen_field(values->Scoreboard2DeltaY, 20, 23) | + __gen_field(values->Scoreboard2DeltaX, 16, 19) | + __gen_field(values->Scoreboard1DeltaY, 12, 15) | + __gen_field(values->Scoreboard1DeltaX, 8, 11) | + __gen_field(values->Scoreboard0DeltaY, 4, 7) | + __gen_field(values->Scoreboard0DeltaX, 0, 3) | + 0; + + dw[8] = + __gen_field(values->Scoreboard7DeltaY, 28, 31) | + __gen_field(values->Scoreboard7DeltaX, 24, 27) | + __gen_field(values->Scoreboard6DeltaY, 20, 23) | + __gen_field(values->Scoreboard6DeltaX, 16, 19) | + __gen_field(values->Scoreboard5DeltaY, 12, 15) | + __gen_field(values->Scoreboard5DeltaX, 8, 11) | + __gen_field(values->Scoreboard4DeltaY, 4, 7) | + __gen_field(values->Scoreboard4DeltaX, 0, 3) | + 0; + +} + +#define GEN8_MI_ARB_CHECK_length_bias 0x00000001 +#define GEN8_MI_ARB_CHECK_header \ + .CommandType = 0, \ + .MICommandOpcode = 5 + +#define GEN8_MI_ARB_CHECK_length 0x00000001 + +struct GEN8_MI_ARB_CHECK { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_ARB_CHECK_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_ARB_CHECK * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_END_length_bias 0x00000001 +#define GEN8_MI_BATCH_BUFFER_END_header \ + .CommandType = 0, \ + .MICommandOpcode = 10 + +#define GEN8_MI_BATCH_BUFFER_END_length 0x00000001 + +struct GEN8_MI_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_BATCH_BUFFER_START_length_bias 0x00000002 +#define GEN8_MI_BATCH_BUFFER_START_header \ + .CommandType = 0, \ + .MICommandOpcode = 49, \ + .DwordLength = 1 + +#define GEN8_MI_BATCH_BUFFER_START_length 0x00000003 + +struct GEN8_MI_BATCH_BUFFER_START { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define _1stlevelbatch 0 +#define _2ndlevelbatch 1 + uint32_t _2ndLevelBatchBuffer; + bool AddOffsetEnable; + uint32_t PredicationEnable; + bool ResourceStreamerEnable; +#define ASI_GGTT 0 +#define ASI_PPGTT 1 + uint32_t AddressSpaceIndicator; + uint32_t DwordLength; + __gen_address_type BatchBufferStartAddress; +}; + +static inline void +GEN8_MI_BATCH_BUFFER_START_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_BATCH_BUFFER_START * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->_2ndLevelBatchBuffer, 22, 22) | + __gen_field(values->AddOffsetEnable, 16, 16) | + __gen_field(values->PredicationEnable, 15, 15) | + __gen_field(values->ResourceStreamerEnable, 10, 10) | + __gen_field(values->AddressSpaceIndicator, 8, 8) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + +} + +#define GEN8_MI_CLFLUSH_length_bias 0x00000002 +#define GEN8_MI_CLFLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 39 + +#define GEN8_MI_CLFLUSH_length 0x00000000 + +struct GEN8_MI_CLFLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTT; + uint32_t DwordLength; + __gen_address_type PageBaseAddress; + uint32_t StartingCachelineOffset; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_CLFLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CLFLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->StartingCachelineOffset, 6, 11) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->PageBaseAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + /* variable length fields follow */ +} + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length_bias 0x00000002 +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_header\ + .CommandType = 0, \ + .MICommandOpcode = 54, \ + .UseGlobalGTT = 0, \ + .CompareSemaphore = 0, \ + .DwordLength = 1 + +#define GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_length 0x00000003 + +struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UseGlobalGTT; + uint32_t CompareSemaphore; + uint32_t DwordLength; + uint32_t CompareDataDword; + __gen_address_type CompareAddress; +}; + +static inline void +GEN8_MI_CONDITIONAL_BATCH_BUFFER_END_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_CONDITIONAL_BATCH_BUFFER_END * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->CompareSemaphore, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->CompareDataDword, 0, 31) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->CompareAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_COPY_MEM_MEM_length_bias 0x00000002 +#define GEN8_MI_COPY_MEM_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 46, \ + .DwordLength = 3 + +#define GEN8_MI_COPY_MEM_MEM_length 0x00000005 + +struct GEN8_MI_COPY_MEM_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTSource; +#define PerProcessGraphicsAddress 0 +#define GlobalGraphicsAddress 1 + uint32_t UseGlobalGTTDestination; + uint32_t DwordLength; + __gen_address_type DestinationMemoryAddress; + __gen_address_type SourceMemoryAddress; +}; + +static inline void +GEN8_MI_COPY_MEM_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_COPY_MEM_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTTSource, 22, 22) | + __gen_field(values->UseGlobalGTTDestination, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + uint32_t dw3 = + 0; + + uint64_t qw3 = + __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, dw3); + + dw[3] = qw3; + dw[4] = qw3 >> 32; + +} + +#define GEN8_MI_LOAD_REGISTER_IMM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 34, \ + .DwordLength = 1 + +#define GEN8_MI_LOAD_REGISTER_IMM_length 0x00000003 + +struct GEN8_MI_LOAD_REGISTER_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t ByteWriteDisables; + uint32_t DwordLength; + uint32_t RegisterOffset; + uint32_t DataDWord; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ByteWriteDisables, 8, 11) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterOffset, 2, 22) | + 0; + + dw[2] = + __gen_field(values->DataDWord, 0, 31) | + 0; + +} + +#define GEN8_MI_LOAD_REGISTER_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_REGISTER_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 41, \ + .DwordLength = 2 + +#define GEN8_MI_LOAD_REGISTER_MEM_length 0x00000004 + +struct GEN8_MI_LOAD_REGISTER_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + uint32_t AsyncModeEnable; + uint32_t DwordLength; + uint32_t RegisterAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_REGISTER_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_REGISTER_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->AsyncModeEnable, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_offset(values->RegisterAddress, 2, 22) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 19, \ + .DwordLength = 0 + +#define GEN8_MI_LOAD_SCAN_LINES_EXCL_length 0x00000002 + +struct GEN8_MI_LOAD_SCAN_LINES_EXCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_EXCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_EXCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length_bias 0x00000002 +#define GEN8_MI_LOAD_SCAN_LINES_INCL_header \ + .CommandType = 0, \ + .MICommandOpcode = 18, \ + .DwordLength = 0 + +#define GEN8_MI_LOAD_SCAN_LINES_INCL_length 0x00000002 + +struct GEN8_MI_LOAD_SCAN_LINES_INCL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define DisplayPlaneA 0 +#define DisplayPlaneB 1 +#define DisplayPlaneC 4 + uint32_t DisplayPlaneSelect; +#define NeverForward 0 +#define AlwaysForward 1 +#define ConditionallyForward 2 + bool ScanLineEventDoneForward; + uint32_t DwordLength; + uint32_t StartScanLineNumber; + uint32_t EndScanLineNumber; +}; + +static inline void +GEN8_MI_LOAD_SCAN_LINES_INCL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_SCAN_LINES_INCL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPlaneSelect, 19, 21) | + __gen_field(values->ScanLineEventDoneForward, 17, 18) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->StartScanLineNumber, 16, 28) | + __gen_field(values->EndScanLineNumber, 0, 12) | + 0; + +} + +#define GEN8_MI_LOAD_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_LOAD_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 44, \ + .DwordLength = 2 + +#define GEN8_MI_LOAD_URB_MEM_length 0x00000004 + +struct GEN8_MI_LOAD_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_LOAD_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_LOAD_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_MATH_length_bias 0x00000002 +#define GEN8_MI_MATH_header \ + .CommandType = 0, \ + .MICommandOpcode = 26 + +#define GEN8_MI_MATH_length 0x00000000 + +struct GEN8_MI_MATH { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t ALUINSTRUCTION1; + uint32_t ALUINSTRUCTION2; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_MATH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_MATH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 5) | + 0; + + dw[1] = + __gen_field(values->ALUINSTRUCTION1, 0, 31) | + 0; + + dw[2] = + __gen_field(values->ALUINSTRUCTION2, 0, 31) | + 0; + + /* variable length fields follow */ +} + +#define GEN8_MI_NOOP_length_bias 0x00000001 +#define GEN8_MI_NOOP_header \ + .CommandType = 0, \ + .MICommandOpcode = 0 + +#define GEN8_MI_NOOP_length 0x00000001 + +struct GEN8_MI_NOOP { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool IdentificationNumberRegisterWriteEnable; + uint32_t IdentificationNumber; +}; + +static inline void +GEN8_MI_NOOP_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_NOOP * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->IdentificationNumberRegisterWriteEnable, 22, 22) | + __gen_field(values->IdentificationNumber, 0, 21) | + 0; + +} + +#define GEN8_MI_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 12 + +#define GEN8_MI_PREDICATE_length 0x00000001 + +struct GEN8_MI_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define LOAD_KEEP 0 +#define LOAD_LOAD 2 +#define LOAD_LOADINV 3 + uint32_t LoadOperation; +#define COMBINE_SET 0 +#define COMBINE_AND 1 +#define COMBINE_OR 2 +#define COMBINE_XOR 3 + uint32_t CombineOperation; +#define COMPARE_SRCS_EQUAL 2 +#define COMPARE_DELTAS_EQUAL 3 + uint32_t CompareOperation; +}; + +static inline void +GEN8_MI_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->LoadOperation, 6, 7) | + __gen_field(values->CombineOperation, 3, 4) | + __gen_field(values->CompareOperation, 0, 1) | + 0; + +} + +#define GEN8_MI_REPORT_HEAD_length_bias 0x00000001 +#define GEN8_MI_REPORT_HEAD_header \ + .CommandType = 0, \ + .MICommandOpcode = 7 + +#define GEN8_MI_REPORT_HEAD_length 0x00000001 + +struct GEN8_MI_REPORT_HEAD { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_REPORT_HEAD_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_REPORT_HEAD * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_RS_CONTEXT_length_bias 0x00000001 +#define GEN8_MI_RS_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 15 + +#define GEN8_MI_RS_CONTEXT_length 0x00000001 + +struct GEN8_MI_RS_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_RESTORE 0 +#define RS_SAVE 1 + uint32_t ResourceStreamerSave; +}; + +static inline void +GEN8_MI_RS_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerSave, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_CONTROL_length_bias 0x00000001 +#define GEN8_MI_RS_CONTROL_header \ + .CommandType = 0, \ + .MICommandOpcode = 6 + +#define GEN8_MI_RS_CONTROL_length 0x00000001 + +struct GEN8_MI_RS_CONTROL { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define RS_STOP 0 +#define RS_START 1 + uint32_t ResourceStreamerControl; +}; + +static inline void +GEN8_MI_RS_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->ResourceStreamerControl, 0, 0) | + 0; + +} + +#define GEN8_MI_RS_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_RS_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 43, \ + .DwordLength = 2 + +#define GEN8_MI_RS_STORE_DATA_IMM_length 0x00000004 + +struct GEN8_MI_RS_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type DestinationAddress; + uint32_t CoreModeEnable; + uint32_t DataDWord0; +}; + +static inline void +GEN8_MI_RS_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_RS_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->DestinationAddress, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + +} + +#define GEN8_MI_SET_CONTEXT_length_bias 0x00000002 +#define GEN8_MI_SET_CONTEXT_header \ + .CommandType = 0, \ + .MICommandOpcode = 24, \ + .DwordLength = 0 + +#define GEN8_MI_SET_CONTEXT_length 0x00000002 + +struct GEN8_MI_SET_CONTEXT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type LogicalContextAddress; + uint32_t ReservedMustbe1; + bool CoreModeEnable; + bool ResourceStreamerStateSaveEnable; + bool ResourceStreamerStateRestoreEnable; + uint32_t ForceRestore; + uint32_t RestoreInhibit; +}; + +static inline void +GEN8_MI_SET_CONTEXT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_CONTEXT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + uint32_t dw1 = + __gen_field(values->ReservedMustbe1, 8, 8) | + __gen_field(values->CoreModeEnable, 4, 4) | + __gen_field(values->ResourceStreamerStateSaveEnable, 3, 3) | + __gen_field(values->ResourceStreamerStateRestoreEnable, 2, 2) | + __gen_field(values->ForceRestore, 1, 1) | + __gen_field(values->RestoreInhibit, 0, 0) | + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->LogicalContextAddress, dw1); + +} + +#define GEN8_MI_SET_PREDICATE_length_bias 0x00000001 +#define GEN8_MI_SET_PREDICATE_header \ + .CommandType = 0, \ + .MICommandOpcode = 1 + +#define GEN8_MI_SET_PREDICATE_length 0x00000001 + +struct GEN8_MI_SET_PREDICATE { + uint32_t CommandType; + uint32_t MICommandOpcode; +#define NOOPNever 0 +#define NOOPonResult2clear 1 +#define NOOPonResult2set 2 +#define NOOPonResultclear 3 +#define NOOPonResultset 4 +#define Executewhenonesliceenabled 5 +#define Executewhentwoslicesareenabled 6 +#define Executewhenthreeslicesareenabled 7 +#define NOOPAlways 15 + uint32_t PREDICATEENABLE; +}; + +static inline void +GEN8_MI_SET_PREDICATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SET_PREDICATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->PREDICATEENABLE, 0, 3) | + 0; + +} + +#define GEN8_MI_STORE_DATA_IMM_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_IMM_header \ + .CommandType = 0, \ + .MICommandOpcode = 32, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_DATA_IMM_length 0x00000004 + +struct GEN8_MI_STORE_DATA_IMM { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool UseGlobalGTT; + bool StoreQword; + uint32_t DwordLength; + __gen_address_type Address; + uint32_t CoreModeEnable; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_IMM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_IMM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UseGlobalGTT, 22, 22) | + __gen_field(values->StoreQword, 21, 21) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + __gen_field(values->CoreModeEnable, 0, 0) | + 0; + + uint64_t qw1 = + __gen_combine_address(data, &dw[1], values->Address, dw1); + + dw[1] = qw1; + dw[2] = qw1 >> 32; + + dw[3] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[4] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_DATA_INDEX_length_bias 0x00000002 +#define GEN8_MI_STORE_DATA_INDEX_header \ + .CommandType = 0, \ + .MICommandOpcode = 33, \ + .DwordLength = 1 + +#define GEN8_MI_STORE_DATA_INDEX_length 0x00000003 + +struct GEN8_MI_STORE_DATA_INDEX { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t UsePerProcessHardwareStatusPage; + uint32_t DwordLength; + uint32_t Offset; + uint32_t DataDWord0; + uint32_t DataDWord1; +}; + +static inline void +GEN8_MI_STORE_DATA_INDEX_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_DATA_INDEX * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->UsePerProcessHardwareStatusPage, 21, 21) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Offset, 2, 11) | + 0; + + dw[2] = + __gen_field(values->DataDWord0, 0, 31) | + 0; + + dw[3] = + __gen_field(values->DataDWord1, 0, 31) | + 0; + +} + +#define GEN8_MI_STORE_URB_MEM_length_bias 0x00000002 +#define GEN8_MI_STORE_URB_MEM_header \ + .CommandType = 0, \ + .MICommandOpcode = 45, \ + .DwordLength = 2 + +#define GEN8_MI_STORE_URB_MEM_length 0x00000004 + +struct GEN8_MI_STORE_URB_MEM { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBAddress; + __gen_address_type MemoryAddress; +}; + +static inline void +GEN8_MI_STORE_URB_MEM_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_STORE_URB_MEM * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBAddress, 2, 14) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->MemoryAddress, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + +} + +#define GEN8_MI_SUSPEND_FLUSH_length_bias 0x00000001 +#define GEN8_MI_SUSPEND_FLUSH_header \ + .CommandType = 0, \ + .MICommandOpcode = 11 + +#define GEN8_MI_SUSPEND_FLUSH_length 0x00000001 + +struct GEN8_MI_SUSPEND_FLUSH { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool SuspendFlush; +}; + +static inline void +GEN8_MI_SUSPEND_FLUSH_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_SUSPEND_FLUSH * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->SuspendFlush, 0, 0) | + 0; + +} + +#define GEN8_MI_TOPOLOGY_FILTER_length_bias 0x00000001 +#define GEN8_MI_TOPOLOGY_FILTER_header \ + .CommandType = 0, \ + .MICommandOpcode = 13 + +#define GEN8_MI_TOPOLOGY_FILTER_length 0x00000001 + +struct GEN8_MI_TOPOLOGY_FILTER { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t TopologyFilterValue; +}; + +static inline void +GEN8_MI_TOPOLOGY_FILTER_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_TOPOLOGY_FILTER * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->TopologyFilterValue, 0, 5) | + 0; + +} + +#define GEN8_MI_UPDATE_GTT_length_bias 0x00000002 +#define GEN8_MI_UPDATE_GTT_header \ + .CommandType = 0, \ + .MICommandOpcode = 35 + +#define GEN8_MI_UPDATE_GTT_length 0x00000000 + +struct GEN8_MI_UPDATE_GTT { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + __gen_address_type EntryAddress; + /* variable length fields follow */ +}; + +static inline void +GEN8_MI_UPDATE_GTT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_UPDATE_GTT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 9) | + 0; + + uint32_t dw1 = + 0; + + dw[1] = + __gen_combine_address(data, &dw[1], values->EntryAddress, dw1); + + /* variable length fields follow */ +} + +#define GEN8_MI_URB_ATOMIC_ALLOC_length_bias 0x00000001 +#define GEN8_MI_URB_ATOMIC_ALLOC_header \ + .CommandType = 0, \ + .MICommandOpcode = 9 + +#define GEN8_MI_URB_ATOMIC_ALLOC_length 0x00000001 + +struct GEN8_MI_URB_ATOMIC_ALLOC { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t URBAtomicStorageOffset; + uint32_t URBAtomicStorageSize; +}; + +static inline void +GEN8_MI_URB_ATOMIC_ALLOC_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_ATOMIC_ALLOC * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->URBAtomicStorageOffset, 12, 19) | + __gen_field(values->URBAtomicStorageSize, 0, 8) | + 0; + +} + +#define GEN8_MI_URB_CLEAR_length_bias 0x00000002 +#define GEN8_MI_URB_CLEAR_header \ + .CommandType = 0, \ + .MICommandOpcode = 25, \ + .DwordLength = 0 + +#define GEN8_MI_URB_CLEAR_length 0x00000002 + +struct GEN8_MI_URB_CLEAR { + uint32_t CommandType; + uint32_t MICommandOpcode; + uint32_t DwordLength; + uint32_t URBClearLength; + uint32_t URBAddress; +}; + +static inline void +GEN8_MI_URB_CLEAR_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_URB_CLEAR * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->URBClearLength, 16, 29) | + __gen_offset(values->URBAddress, 0, 14) | + 0; + +} + +#define GEN8_MI_USER_INTERRUPT_length_bias 0x00000001 +#define GEN8_MI_USER_INTERRUPT_header \ + .CommandType = 0, \ + .MICommandOpcode = 2 + +#define GEN8_MI_USER_INTERRUPT_length 0x00000001 + +struct GEN8_MI_USER_INTERRUPT { + uint32_t CommandType; + uint32_t MICommandOpcode; +}; + +static inline void +GEN8_MI_USER_INTERRUPT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_USER_INTERRUPT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + 0; + +} + +#define GEN8_MI_WAIT_FOR_EVENT_length_bias 0x00000001 +#define GEN8_MI_WAIT_FOR_EVENT_header \ + .CommandType = 0, \ + .MICommandOpcode = 3 + +#define GEN8_MI_WAIT_FOR_EVENT_length 0x00000001 + +struct GEN8_MI_WAIT_FOR_EVENT { + uint32_t CommandType; + uint32_t MICommandOpcode; + bool DisplayPipeCVerticalBlankWaitEnable; + bool DisplaySpriteCFlipPendingWaitEnable; + bool DisplayPlaneCFlipPendingWaitEnable; + bool DisplayPipeCScanLineWaitEnable; + bool DisplayPipeBVerticalBlankWaitEnable; + bool DisplaySpriteBFlipPendingWaitEnable; + bool DisplayPlaneBFlipPendingWaitEnable; + bool DisplayPipeBScanLineWaitEnable; + bool DisplayPipeAVerticalBlankWaitEnable; + bool DisplaySpriteAFlipPendingWaitEnable; + bool DisplayPlaneAFlipPendingWaitEnable; + bool DisplayPipeAScanLineWaitEnable; +}; + +static inline void +GEN8_MI_WAIT_FOR_EVENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_MI_WAIT_FOR_EVENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->MICommandOpcode, 23, 28) | + __gen_field(values->DisplayPipeCVerticalBlankWaitEnable, 21, 21) | + __gen_field(values->DisplaySpriteCFlipPendingWaitEnable, 20, 20) | + __gen_field(values->DisplayPlaneCFlipPendingWaitEnable, 15, 15) | + __gen_field(values->DisplayPipeCScanLineWaitEnable, 14, 14) | + __gen_field(values->DisplayPipeBVerticalBlankWaitEnable, 11, 11) | + __gen_field(values->DisplaySpriteBFlipPendingWaitEnable, 10, 10) | + __gen_field(values->DisplayPlaneBFlipPendingWaitEnable, 9, 9) | + __gen_field(values->DisplayPipeBScanLineWaitEnable, 8, 8) | + __gen_field(values->DisplayPipeAVerticalBlankWaitEnable, 3, 3) | + __gen_field(values->DisplaySpriteAFlipPendingWaitEnable, 2, 2) | + __gen_field(values->DisplayPlaneAFlipPendingWaitEnable, 1, 1) | + __gen_field(values->DisplayPipeAScanLineWaitEnable, 0, 0) | + 0; + +} + +#define GEN8_PIPE_CONTROL_length_bias 0x00000002 +#define GEN8_PIPE_CONTROL_header \ + .CommandType = 3, \ + .CommandSubType = 3, \ + ._3DCommandOpcode = 2, \ + ._3DCommandSubOpcode = 0, \ + .DwordLength = 4 + +#define GEN8_PIPE_CONTROL_length 0x00000006 + +struct GEN8_PIPE_CONTROL { + uint32_t CommandType; + uint32_t CommandSubType; + uint32_t _3DCommandOpcode; + uint32_t _3DCommandSubOpcode; + uint32_t DwordLength; +#define DAT_PPGTT 0 +#define DAT_GGTT 1 + uint32_t DestinationAddressType; +#define NoLRIOperation 0 +#define MMIOWriteImmediateData 1 + uint32_t LRIPostSyncOperation; + uint32_t StoreDataIndex; + uint32_t CommandStreamerStallEnable; +#define DontReset 0 +#define Reset 1 + uint32_t GlobalSnapshotCountReset; + uint32_t TLBInvalidate; + bool GenericMediaStateClear; +#define NoWrite 0 +#define WriteImmediateData 1 +#define WritePSDepthCount 2 +#define WriteTimestamp 3 + uint32_t PostSyncOperation; + bool DepthStallEnable; +#define DisableFlush 0 +#define EnableFlush 1 + bool RenderTargetCacheFlushEnable; + bool InstructionCacheInvalidateEnable; + bool TextureCacheInvalidationEnable; + bool IndirectStatePointersDisable; + bool NotifyEnable; + bool PipeControlFlushEnable; + bool DCFlushEnable; + bool VFCacheInvalidationEnable; + bool ConstantCacheInvalidationEnable; + bool StateCacheInvalidationEnable; + bool StallAtPixelScoreboard; +#define FlushDisabled 0 +#define FlushEnabled 1 + bool DepthCacheFlushEnable; + __gen_address_type Address; + uint64_t ImmediateData; +}; + +static inline void +GEN8_PIPE_CONTROL_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_PIPE_CONTROL * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->CommandType, 29, 31) | + __gen_field(values->CommandSubType, 27, 28) | + __gen_field(values->_3DCommandOpcode, 24, 26) | + __gen_field(values->_3DCommandSubOpcode, 16, 23) | + __gen_field(values->DwordLength, 0, 7) | + 0; + + dw[1] = + __gen_field(values->DestinationAddressType, 24, 24) | + __gen_field(values->LRIPostSyncOperation, 23, 23) | + __gen_field(values->StoreDataIndex, 21, 21) | + __gen_field(values->CommandStreamerStallEnable, 20, 20) | + __gen_field(values->GlobalSnapshotCountReset, 19, 19) | + __gen_field(values->TLBInvalidate, 18, 18) | + __gen_field(values->GenericMediaStateClear, 16, 16) | + __gen_field(values->PostSyncOperation, 14, 15) | + __gen_field(values->DepthStallEnable, 13, 13) | + __gen_field(values->RenderTargetCacheFlushEnable, 12, 12) | + __gen_field(values->InstructionCacheInvalidateEnable, 11, 11) | + __gen_field(values->TextureCacheInvalidationEnable, 10, 10) | + __gen_field(values->IndirectStatePointersDisable, 9, 9) | + __gen_field(values->NotifyEnable, 8, 8) | + __gen_field(values->PipeControlFlushEnable, 7, 7) | + __gen_field(values->DCFlushEnable, 5, 5) | + __gen_field(values->VFCacheInvalidationEnable, 4, 4) | + __gen_field(values->ConstantCacheInvalidationEnable, 3, 3) | + __gen_field(values->StateCacheInvalidationEnable, 2, 2) | + __gen_field(values->StallAtPixelScoreboard, 1, 1) | + __gen_field(values->DepthCacheFlushEnable, 0, 0) | + 0; + + uint32_t dw2 = + 0; + + uint64_t qw2 = + __gen_combine_address(data, &dw[2], values->Address, dw2); + + dw[2] = qw2; + dw[3] = qw2 >> 32; + + uint64_t qw4 = + __gen_field(values->ImmediateData, 0, 63) | + 0; + + dw[4] = qw4; + dw[5] = qw4 >> 32; + +} + +#define GEN8_SCISSOR_RECT_length 0x00000002 + +struct GEN8_SCISSOR_RECT { + uint32_t ScissorRectangleYMin; + uint32_t ScissorRectangleXMin; + uint32_t ScissorRectangleYMax; + uint32_t ScissorRectangleXMax; +}; + +static inline void +GEN8_SCISSOR_RECT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SCISSOR_RECT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->ScissorRectangleYMin, 16, 31) | + __gen_field(values->ScissorRectangleXMin, 0, 15) | + 0; + + dw[1] = + __gen_field(values->ScissorRectangleYMax, 16, 31) | + __gen_field(values->ScissorRectangleXMax, 0, 15) | + 0; + +} + +#define GEN8_SF_CLIP_VIEWPORT_length 0x00000010 + +struct GEN8_SF_CLIP_VIEWPORT { + float ViewportMatrixElementm00; + float ViewportMatrixElementm11; + float ViewportMatrixElementm22; + float ViewportMatrixElementm30; + float ViewportMatrixElementm31; + float ViewportMatrixElementm32; + float XMinClipGuardband; + float XMaxClipGuardband; + float YMinClipGuardband; + float YMaxClipGuardband; + float XMinViewPort; + float XMaxViewPort; + float YMinViewPort; + float YMaxViewPort; +}; + +static inline void +GEN8_SF_CLIP_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SF_CLIP_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->ViewportMatrixElementm00) | + 0; + + dw[1] = + __gen_float(values->ViewportMatrixElementm11) | + 0; + + dw[2] = + __gen_float(values->ViewportMatrixElementm22) | + 0; + + dw[3] = + __gen_float(values->ViewportMatrixElementm30) | + 0; + + dw[4] = + __gen_float(values->ViewportMatrixElementm31) | + 0; + + dw[5] = + __gen_float(values->ViewportMatrixElementm32) | + 0; + + dw[6] = + 0; + + dw[7] = + 0; + + dw[8] = + __gen_float(values->XMinClipGuardband) | + 0; + + dw[9] = + __gen_float(values->XMaxClipGuardband) | + 0; + + dw[10] = + __gen_float(values->YMinClipGuardband) | + 0; + + dw[11] = + __gen_float(values->YMaxClipGuardband) | + 0; + + dw[12] = + __gen_float(values->XMinViewPort) | + 0; + + dw[13] = + __gen_float(values->XMaxViewPort) | + 0; + + dw[14] = + __gen_float(values->YMinViewPort) | + 0; + + dw[15] = + __gen_float(values->YMaxViewPort) | + 0; + +} + +#define GEN8_BLEND_STATE_length 0x00000011 + +#define GEN8_BLEND_STATE_ENTRY_length 0x00000002 + +struct GEN8_BLEND_STATE_ENTRY { + bool LogicOpEnable; + uint32_t LogicOpFunction; + uint32_t PreBlendSourceOnlyClampEnable; +#define COLORCLAMP_UNORM 0 +#define COLORCLAMP_SNORM 1 +#define COLORCLAMP_RTFORMAT 2 + uint32_t ColorClampRange; + bool PreBlendColorClampEnable; + bool PostBlendColorClampEnable; + bool ColorBufferBlendEnable; + uint32_t SourceBlendFactor; + uint32_t DestinationBlendFactor; + uint32_t ColorBlendFunction; + uint32_t SourceAlphaBlendFactor; + uint32_t DestinationAlphaBlendFactor; + uint32_t AlphaBlendFunction; + bool WriteDisableAlpha; + bool WriteDisableRed; + bool WriteDisableGreen; + bool WriteDisableBlue; +}; + +static inline void +GEN8_BLEND_STATE_ENTRY_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE_ENTRY * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + uint64_t qw0 = + __gen_field(values->LogicOpEnable, 63, 63) | + __gen_field(values->LogicOpFunction, 59, 62) | + __gen_field(values->PreBlendSourceOnlyClampEnable, 36, 36) | + __gen_field(values->ColorClampRange, 34, 35) | + __gen_field(values->PreBlendColorClampEnable, 33, 33) | + __gen_field(values->PostBlendColorClampEnable, 32, 32) | + __gen_field(values->ColorBufferBlendEnable, 31, 31) | + __gen_field(values->SourceBlendFactor, 26, 30) | + __gen_field(values->DestinationBlendFactor, 21, 25) | + __gen_field(values->ColorBlendFunction, 18, 20) | + __gen_field(values->SourceAlphaBlendFactor, 13, 17) | + __gen_field(values->DestinationAlphaBlendFactor, 8, 12) | + __gen_field(values->AlphaBlendFunction, 5, 7) | + __gen_field(values->WriteDisableAlpha, 3, 3) | + __gen_field(values->WriteDisableRed, 2, 2) | + __gen_field(values->WriteDisableGreen, 1, 1) | + __gen_field(values->WriteDisableBlue, 0, 0) | + 0; + + dw[0] = qw0; + dw[1] = qw0 >> 32; + +} + +struct GEN8_BLEND_STATE { + bool AlphaToCoverageEnable; + bool IndependentAlphaBlendEnable; + bool AlphaToOneEnable; + bool AlphaToCoverageDitherEnable; + bool AlphaTestEnable; + uint32_t AlphaTestFunction; + bool ColorDitherEnable; + uint32_t XDitherOffset; + uint32_t YDitherOffset; + struct GEN8_BLEND_STATE_ENTRY Entry[8]; +}; + +static inline void +GEN8_BLEND_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLEND_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->AlphaToCoverageEnable, 31, 31) | + __gen_field(values->IndependentAlphaBlendEnable, 30, 30) | + __gen_field(values->AlphaToOneEnable, 29, 29) | + __gen_field(values->AlphaToCoverageDitherEnable, 28, 28) | + __gen_field(values->AlphaTestEnable, 27, 27) | + __gen_field(values->AlphaTestFunction, 24, 26) | + __gen_field(values->ColorDitherEnable, 23, 23) | + __gen_field(values->XDitherOffset, 21, 22) | + __gen_field(values->YDitherOffset, 19, 20) | + 0; + + for (uint32_t i = 0, j = 1; i < 8; i++, j += 2) + GEN8_BLEND_STATE_ENTRY_pack(data, &dw[j], &values->Entry[i]); +} + +#define GEN8_CC_VIEWPORT_length 0x00000002 + +struct GEN8_CC_VIEWPORT { + float MinimumDepth; + float MaximumDepth; +}; + +static inline void +GEN8_CC_VIEWPORT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_CC_VIEWPORT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_float(values->MinimumDepth) | + 0; + + dw[1] = + __gen_float(values->MaximumDepth) | + 0; + +} + +#define GEN8_COLOR_CALC_STATE_length 0x00000006 + +struct GEN8_COLOR_CALC_STATE { + uint32_t StencilReferenceValue; + uint32_t BackFaceStencilReferenceValue; +#define Cancelled 0 +#define NotCancelled 1 + uint32_t RoundDisableFunctionDisable; +#define ALPHATEST_UNORM8 0 +#define ALPHATEST_FLOAT32 1 + uint32_t AlphaTestFormat; + uint32_t AlphaReferenceValueAsUNORM8; + float AlphaReferenceValueAsFLOAT32; + float BlendConstantColorRed; + float BlendConstantColorGreen; + float BlendConstantColorBlue; + float BlendConstantColorAlpha; +}; + +static inline void +GEN8_COLOR_CALC_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_COLOR_CALC_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->StencilReferenceValue, 24, 31) | + __gen_field(values->BackFaceStencilReferenceValue, 16, 23) | + __gen_field(values->RoundDisableFunctionDisable, 15, 15) | + __gen_field(values->AlphaTestFormat, 0, 0) | + 0; + + dw[1] = + __gen_field(values->AlphaReferenceValueAsUNORM8, 0, 31) | + __gen_float(values->AlphaReferenceValueAsFLOAT32) | + 0; + + dw[2] = + __gen_float(values->BlendConstantColorRed) | + 0; + + dw[3] = + __gen_float(values->BlendConstantColorGreen) | + 0; + + dw[4] = + __gen_float(values->BlendConstantColorBlue) | + 0; + + dw[5] = + __gen_float(values->BlendConstantColorAlpha) | + 0; + +} + +#define GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_length 0x00000002 + +struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 { + uint32_t BlackPointOffsetR; + uint32_t BlackPointOffsetG; + uint32_t BlackPointOffsetB; +}; + +static inline void +GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BLACK_LEVEL_CORRECTION_STATE__DW7576 * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->BlackPointOffsetR, 0, 12) | + 0; + + dw[1] = + __gen_field(values->BlackPointOffsetG, 13, 25) | + __gen_field(values->BlackPointOffsetB, 0, 12) | + 0; + +} + +#define GEN8_INTERFACE_DESCRIPTOR_DATA_length 0x00000008 + +struct GEN8_INTERFACE_DESCRIPTOR_DATA { + uint32_t KernelStartPointer; + uint32_t KernelStartPointerHigh; +#define Ftz 0 +#define SetByKernel 1 + uint32_t DenormMode; +#define Multiple 0 +#define Single 1 + uint32_t SingleProgramFlow; +#define NormalPriority 0 +#define HighPriority 1 + uint32_t ThreadPriority; +#define IEEE754 0 +#define Alternate 1 + uint32_t FloatingPointMode; + bool IllegalOpcodeExceptionEnable; + bool MaskStackExceptionEnable; + bool SoftwareExceptionEnable; + uint32_t SamplerStatePointer; +#define Nosamplersused 0 +#define Between1and4samplersused 1 +#define Between5and8samplersused 2 +#define Between9and12samplersused 3 +#define Between13and16samplersused 4 + uint32_t SamplerCount; + uint32_t BindingTablePointer; + uint32_t BindingTableEntryCount; + uint32_t ConstantIndirectURBEntryReadLength; + uint32_t ConstantURBEntryReadOffset; +#define RTNE 0 +#define RU 1 +#define RD 2 +#define RTZ 3 + uint32_t RoundingMode; + bool BarrierEnable; +#define Encodes0k 0 +#define Encodes4k 1 +#define Encodes8k 2 +#define Encodes16k 4 +#define Encodes32k 8 +#define Encodes64k 16 + uint32_t SharedLocalMemorySize; + uint32_t NumberofThreadsinGPGPUThreadGroup; + uint32_t CrossThreadConstantDataReadLength; +}; + +static inline void +GEN8_INTERFACE_DESCRIPTOR_DATA_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_INTERFACE_DESCRIPTOR_DATA * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->KernelStartPointer, 6, 31) | + 0; + + dw[1] = + __gen_offset(values->KernelStartPointerHigh, 0, 15) | + 0; + + dw[2] = + __gen_field(values->DenormMode, 19, 19) | + __gen_field(values->SingleProgramFlow, 18, 18) | + __gen_field(values->ThreadPriority, 17, 17) | + __gen_field(values->FloatingPointMode, 16, 16) | + __gen_field(values->IllegalOpcodeExceptionEnable, 13, 13) | + __gen_field(values->MaskStackExceptionEnable, 11, 11) | + __gen_field(values->SoftwareExceptionEnable, 7, 7) | + 0; + + dw[3] = + __gen_offset(values->SamplerStatePointer, 5, 31) | + __gen_field(values->SamplerCount, 2, 4) | + 0; + + dw[4] = + __gen_offset(values->BindingTablePointer, 5, 15) | + __gen_field(values->BindingTableEntryCount, 0, 4) | + 0; + + dw[5] = + __gen_field(values->ConstantIndirectURBEntryReadLength, 16, 31) | + __gen_field(values->ConstantURBEntryReadOffset, 0, 15) | + 0; + + dw[6] = + __gen_field(values->RoundingMode, 22, 23) | + __gen_field(values->BarrierEnable, 21, 21) | + __gen_field(values->SharedLocalMemorySize, 16, 20) | + __gen_field(values->NumberofThreadsinGPGPUThreadGroup, 0, 9) | + 0; + + dw[7] = + __gen_field(values->CrossThreadConstantDataReadLength, 0, 7) | + 0; + +} + +#define GEN8_BINDING_TABLE_STATE_length 0x00000001 + +struct GEN8_BINDING_TABLE_STATE { + uint32_t SurfaceStatePointer; +}; + +static inline void +GEN8_BINDING_TABLE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_BINDING_TABLE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_offset(values->SurfaceStatePointer, 6, 31) | + 0; + +} + +#define GEN8_RENDER_SURFACE_STATE_length 0x00000010 + +struct GEN8_RENDER_SURFACE_STATE { +#define SURFTYPE_1D 0 +#define SURFTYPE_2D 1 +#define SURFTYPE_3D 2 +#define SURFTYPE_CUBE 3 +#define SURFTYPE_BUFFER 4 +#define SURFTYPE_STRBUF 5 +#define SURFTYPE_NULL 7 + uint32_t SurfaceType; + bool SurfaceArray; + uint32_t SurfaceFormat; +#define VALIGN4 1 +#define VALIGN8 2 +#define VALIGN16 3 + uint32_t SurfaceVerticalAlignment; +#define HALIGN4 1 +#define HALIGN8 2 +#define HALIGN16 3 + uint32_t SurfaceHorizontalAlignment; +#define LINEAR 0 +#define WMAJOR 1 +#define XMAJOR 2 +#define YMAJOR 3 + uint32_t TileMode; + uint32_t VerticalLineStride; + uint32_t VerticalLineStrideOffset; + bool SamplerL2BypassModeDisable; +#define WriteOnlyCache 0 +#define ReadWriteCache 1 + uint32_t RenderCacheReadWriteMode; +#define NORMAL_MODE 0 +#define PROGRESSIVE_FRAME 2 +#define INTERLACED_FRAME 3 + uint32_t MediaBoundaryPixelMode; + bool CubeFaceEnablePositiveZ; + bool CubeFaceEnableNegativeZ; + bool CubeFaceEnablePositiveY; + bool CubeFaceEnableNegativeY; + bool CubeFaceEnablePositiveX; + bool CubeFaceEnableNegativeX; + struct GEN8_MEMORY_OBJECT_CONTROL_STATE MemoryObjectControlState; + float BaseMipLevel; + uint32_t SurfaceQPitch; + uint32_t Height; + uint32_t Width; + uint32_t Depth; + uint32_t SurfacePitch; +#define _0DEG 0 +#define _90DEG 1 +#define _270DEG 3 + uint32_t RenderTargetAndSampleUnormRotation; + uint32_t MinimumArrayElement; + uint32_t RenderTargetViewExtent; +#define MSS 0 +#define DEPTH_STENCIL 1 + uint32_t MultisampledSurfaceStorageFormat; +#define MULTISAMPLECOUNT_1 0 +#define MULTISAMPLECOUNT_2 1 +#define MULTISAMPLECOUNT_4 2 +#define MULTISAMPLECOUNT_8 3 + uint32_t NumberofMultisamples; + uint32_t MultisamplePositionPaletteIndex; + uint32_t XOffset; + uint32_t YOffset; + bool EWADisableForCube; +#define GPUcoherent 0 +#define IAcoherent 1 + uint32_t CoherencyType; + uint32_t SurfaceMinLOD; + uint32_t MIPCountLOD; + uint32_t AuxiliarySurfaceQPitch; + uint32_t AuxiliarySurfacePitch; +#define AUX_NONE 0 +#define AUX_MCS 1 +#define AUX_APPEND 2 +#define AUX_HIZ 3 + uint32_t AuxiliarySurfaceMode; + bool SeparateUVPlaneEnable; + uint32_t XOffsetforUorUVPlane; + uint32_t YOffsetforUorUVPlane; + uint32_t RedClearColor; + uint32_t GreenClearColor; + uint32_t BlueClearColor; + uint32_t AlphaClearColor; + uint32_t ShaderChannelSelectRed; + uint32_t ShaderChannelSelectGreen; + uint32_t ShaderChannelSelectBlue; + uint32_t ShaderChannelSelectAlpha; + float ResourceMinLOD; + __gen_address_type SurfaceBaseAddress; + uint32_t XOffsetforVPlane; + uint32_t YOffsetforVPlane; + uint32_t AuxiliaryTableIndexforMediaCompressedSurface; + __gen_address_type AuxiliarySurfaceBaseAddress; +}; + +static inline void +GEN8_RENDER_SURFACE_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_RENDER_SURFACE_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SurfaceType, 29, 31) | + __gen_field(values->SurfaceArray, 28, 28) | + __gen_field(values->SurfaceFormat, 18, 26) | + __gen_field(values->SurfaceVerticalAlignment, 16, 17) | + __gen_field(values->SurfaceHorizontalAlignment, 14, 15) | + __gen_field(values->TileMode, 12, 13) | + __gen_field(values->VerticalLineStride, 11, 11) | + __gen_field(values->VerticalLineStrideOffset, 10, 10) | + __gen_field(values->SamplerL2BypassModeDisable, 9, 9) | + __gen_field(values->RenderCacheReadWriteMode, 8, 8) | + __gen_field(values->MediaBoundaryPixelMode, 6, 7) | + __gen_field(values->CubeFaceEnablePositiveZ, 0, 0) | + __gen_field(values->CubeFaceEnableNegativeZ, 1, 1) | + __gen_field(values->CubeFaceEnablePositiveY, 2, 2) | + __gen_field(values->CubeFaceEnableNegativeY, 3, 3) | + __gen_field(values->CubeFaceEnablePositiveX, 4, 4) | + __gen_field(values->CubeFaceEnableNegativeX, 5, 5) | + 0; + + uint32_t dw_MemoryObjectControlState; + GEN8_MEMORY_OBJECT_CONTROL_STATE_pack(data, &dw_MemoryObjectControlState, &values->MemoryObjectControlState); + dw[1] = + __gen_field(dw_MemoryObjectControlState, 24, 30) | + __gen_field(values->BaseMipLevel * (1 << 1), 19, 23) | + __gen_field(values->SurfaceQPitch, 0, 14) | + 0; + + dw[2] = + __gen_field(values->Height, 16, 29) | + __gen_field(values->Width, 0, 13) | + 0; + + dw[3] = + __gen_field(values->Depth, 21, 31) | + __gen_field(values->SurfacePitch, 0, 17) | + 0; + + dw[4] = + __gen_field(values->RenderTargetAndSampleUnormRotation, 29, 30) | + __gen_field(values->MinimumArrayElement, 18, 28) | + __gen_field(values->RenderTargetViewExtent, 7, 17) | + __gen_field(values->MultisampledSurfaceStorageFormat, 6, 6) | + __gen_field(values->NumberofMultisamples, 3, 5) | + __gen_field(values->MultisamplePositionPaletteIndex, 0, 2) | + 0; + + dw[5] = + __gen_offset(values->XOffset, 25, 31) | + __gen_offset(values->YOffset, 21, 23) | + __gen_field(values->EWADisableForCube, 20, 20) | + __gen_field(values->CoherencyType, 14, 14) | + __gen_field(values->SurfaceMinLOD, 4, 7) | + __gen_field(values->MIPCountLOD, 0, 3) | + 0; + + dw[6] = + __gen_field(values->AuxiliarySurfaceQPitch, 16, 30) | + __gen_field(values->AuxiliarySurfacePitch, 3, 11) | + __gen_field(values->AuxiliarySurfaceMode, 0, 2) | + __gen_field(values->SeparateUVPlaneEnable, 31, 31) | + __gen_field(values->XOffsetforUorUVPlane, 16, 29) | + __gen_field(values->YOffsetforUorUVPlane, 0, 13) | + 0; + + dw[7] = + __gen_field(values->RedClearColor, 31, 31) | + __gen_field(values->GreenClearColor, 30, 30) | + __gen_field(values->BlueClearColor, 29, 29) | + __gen_field(values->AlphaClearColor, 28, 28) | + __gen_field(values->ShaderChannelSelectRed, 25, 27) | + __gen_field(values->ShaderChannelSelectGreen, 22, 24) | + __gen_field(values->ShaderChannelSelectBlue, 19, 21) | + __gen_field(values->ShaderChannelSelectAlpha, 16, 18) | + __gen_field(values->ResourceMinLOD * (1 << 8), 0, 11) | + 0; + + uint32_t dw8 = + 0; + + uint64_t qw8 = + __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, dw8); + + dw[8] = qw8; + dw[9] = qw8 >> 32; + + uint32_t dw10 = + __gen_field(values->XOffsetforVPlane, 48, 61) | + __gen_field(values->YOffsetforVPlane, 32, 45) | + __gen_field(values->AuxiliaryTableIndexforMediaCompressedSurface, 21, 31) | + 0; + + uint64_t qw10 = + __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, dw10); + + dw[10] = qw10; + dw[11] = qw10 >> 32; + + dw[12] = + 0; + + dw[13] = + 0; + + dw[14] = + 0; + + dw[15] = + 0; + +} + +#define GEN8_FILTER_COEFFICIENT_length 0x00000001 + +struct GEN8_FILTER_COEFFICIENT { + uint32_t FilterCoefficient; +}; + +static inline void +GEN8_FILTER_COEFFICIENT_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_FILTER_COEFFICIENT * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->FilterCoefficient, 0, 7) | + 0; + +} + +#define GEN8_SAMPLER_STATE_length 0x00000004 + +struct GEN8_SAMPLER_STATE { + bool SamplerDisable; +#define DX10OGL 0 +#define DX9 1 + uint32_t TextureBorderColorMode; +#define CLAMP_NONE 0 +#define CLAMP_OGL 2 + uint32_t LODPreClampMode; + float BaseMipLevel; +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + uint32_t MipModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MagModeFilter; +#define MAPFILTER_NEAREST 0 +#define MAPFILTER_LINEAR 1 +#define MAPFILTER_ANISOTROPIC 2 +#define MAPFILTER_MONO 6 + uint32_t MinModeFilter; + uint32_t TextureLODBias; +#define LEGACY 0 +#define EWAApproximation 1 + uint32_t AnisotropicAlgorithm; + float MinLOD; + float MaxLOD; + bool ChromaKeyEnable; + uint32_t ChromaKeyIndex; +#define KEYFILTER_KILL_ON_ANY_MATCH 0 +#define KEYFILTER_REPLACE_BLACK 1 + uint32_t ChromaKeyMode; +#define PREFILTEROPALWAYS 0 +#define PREFILTEROPNEVER 1 +#define PREFILTEROPLESS 2 +#define PREFILTEROPEQUAL 3 +#define PREFILTEROPLEQUAL 4 +#define PREFILTEROPGREATER 5 +#define PREFILTEROPNOTEQUAL 6 +#define PREFILTEROPGEQUAL 7 + uint32_t ShadowFunction; +#define PROGRAMMED 0 +#define OVERRIDE 1 + uint32_t CubeSurfaceControlMode; + uint32_t IndirectStatePointer; +#define MIPNONE 0 +#define MIPFILTER 1 + uint32_t LODClampMagnificationMode; +#define RATIO21 0 +#define RATIO41 1 +#define RATIO61 2 +#define RATIO81 3 +#define RATIO101 4 +#define RATIO121 5 +#define RATIO141 6 +#define RATIO161 7 + uint32_t MaximumAnisotropy; + bool RAddressMinFilterRoundingEnable; + bool RAddressMagFilterRoundingEnable; + bool VAddressMinFilterRoundingEnable; + bool VAddressMagFilterRoundingEnable; + bool UAddressMinFilterRoundingEnable; + bool UAddressMagFilterRoundingEnable; +#define FULL 0 +#define HIGH 1 +#define MED 2 +#define LOW 3 + uint32_t TrilinearFilterQuality; + bool NonnormalizedCoordinateEnable; + uint32_t TCXAddressControlMode; + uint32_t TCYAddressControlMode; + uint32_t TCZAddressControlMode; +}; + +static inline void +GEN8_SAMPLER_STATE_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->SamplerDisable, 31, 31) | + __gen_field(values->TextureBorderColorMode, 29, 29) | + __gen_field(values->LODPreClampMode, 27, 28) | + __gen_field(values->BaseMipLevel * (1 << 1), 22, 26) | + __gen_field(values->MipModeFilter, 20, 21) | + __gen_field(values->MagModeFilter, 17, 19) | + __gen_field(values->MinModeFilter, 14, 16) | + __gen_field(values->TextureLODBias, 1, 13) | + __gen_field(values->AnisotropicAlgorithm, 0, 0) | + 0; + + dw[1] = + __gen_field(values->MinLOD * (1 << 8), 20, 31) | + __gen_field(values->MaxLOD * (1 << 8), 8, 19) | + __gen_field(values->ChromaKeyEnable, 7, 7) | + __gen_field(values->ChromaKeyIndex, 5, 6) | + __gen_field(values->ChromaKeyMode, 4, 4) | + __gen_field(values->ShadowFunction, 1, 3) | + __gen_field(values->CubeSurfaceControlMode, 0, 0) | + 0; + + dw[2] = + __gen_field(values->IndirectStatePointer, 6, 23) | + __gen_field(values->LODClampMagnificationMode, 0, 0) | + 0; + + dw[3] = + __gen_field(values->MaximumAnisotropy, 19, 21) | + __gen_field(values->RAddressMinFilterRoundingEnable, 13, 13) | + __gen_field(values->RAddressMagFilterRoundingEnable, 14, 14) | + __gen_field(values->VAddressMinFilterRoundingEnable, 15, 15) | + __gen_field(values->VAddressMagFilterRoundingEnable, 16, 16) | + __gen_field(values->UAddressMinFilterRoundingEnable, 17, 17) | + __gen_field(values->UAddressMagFilterRoundingEnable, 18, 18) | + __gen_field(values->TrilinearFilterQuality, 11, 12) | + __gen_field(values->NonnormalizedCoordinateEnable, 10, 10) | + __gen_field(values->TCXAddressControlMode, 6, 8) | + __gen_field(values->TCYAddressControlMode, 3, 5) | + __gen_field(values->TCZAddressControlMode, 0, 2) | + 0; + +} + +#define GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_length 0x00000008 + +struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS { + uint32_t Table0YFilterCoefficientn1; + uint32_t Table0XFilterCoefficientn1; + uint32_t Table0YFilterCoefficientn0; + uint32_t Table0XFilterCoefficientn0; + uint32_t Table0YFilterCoefficientn3; + uint32_t Table0XFilterCoefficientn3; + uint32_t Table0YFilterCoefficientn2; + uint32_t Table0XFilterCoefficientn2; + uint32_t Table0YFilterCoefficientn5; + uint32_t Table0XFilterCoefficientn5; + uint32_t Table0YFilterCoefficientn4; + uint32_t Table0XFilterCoefficientn4; + uint32_t Table0YFilterCoefficientn7; + uint32_t Table0XFilterCoefficientn7; + uint32_t Table0YFilterCoefficientn6; + uint32_t Table0XFilterCoefficientn6; + uint32_t Table1XFilterCoefficientn3; + uint32_t Table1XFilterCoefficientn2; + uint32_t Table1XFilterCoefficientn5; + uint32_t Table1XFilterCoefficientn4; + uint32_t Table1YFilterCoefficientn3; + uint32_t Table1YFilterCoefficientn2; + uint32_t Table1YFilterCoefficientn5; + uint32_t Table1YFilterCoefficientn4; +}; + +static inline void +GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS_pack(__gen_user_data *data, void * restrict dst, + const struct GEN8_SAMPLER_STATE_8X8_AVS_COEFFICIENTS * restrict values) +{ + uint32_t *dw = (uint32_t * restrict) dst; + + dw[0] = + __gen_field(values->Table0YFilterCoefficientn1, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn1, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn0, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn0, 0, 7) | + 0; + + dw[1] = + __gen_field(values->Table0YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn3, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn2, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn2, 0, 7) | + 0; + + dw[2] = + __gen_field(values->Table0YFilterCoefficientn5, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn5, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn4, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn4, 0, 7) | + 0; + + dw[3] = + __gen_field(values->Table0YFilterCoefficientn7, 24, 31) | + __gen_field(values->Table0XFilterCoefficientn7, 16, 23) | + __gen_field(values->Table0YFilterCoefficientn6, 8, 15) | + __gen_field(values->Table0XFilterCoefficientn6, 0, 7) | + 0; + + dw[4] = + __gen_field(values->Table1XFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1XFilterCoefficientn2, 16, 23) | + 0; + + dw[5] = + __gen_field(values->Table1XFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1XFilterCoefficientn4, 0, 7) | + 0; + + dw[6] = + __gen_field(values->Table1YFilterCoefficientn3, 24, 31) | + __gen_field(values->Table1YFilterCoefficientn2, 16, 23) | + 0; + + dw[7] = + __gen_field(values->Table1YFilterCoefficientn5, 8, 15) | + __gen_field(values->Table1YFilterCoefficientn4, 0, 7) | + 0; + +} + +/* Enum 3D_Prim_Topo_Type */ +#define _3DPRIM_POINTLIST 1 +#define _3DPRIM_LINELIST 2 +#define _3DPRIM_LINESTRIP 3 +#define _3DPRIM_TRILIST 4 +#define _3DPRIM_TRISTRIP 5 +#define _3DPRIM_TRIFAN 6 +#define _3DPRIM_QUADLIST 7 +#define _3DPRIM_QUADSTRIP 8 +#define _3DPRIM_LINELIST_ADJ 9 +#define _3DPRIM_LINESTRIP_ADJ 10 +#define _3DPRIM_TRILIST_ADJ 11 +#define _3DPRIM_TRISTRIP_ADJ 12 +#define _3DPRIM_TRISTRIP_REVERSE 13 +#define _3DPRIM_POLYGON 14 +#define _3DPRIM_RECTLIST 15 +#define _3DPRIM_LINELOOP 16 +#define _3DPRIM_POINTLIST_BF 17 +#define _3DPRIM_LINESTRIP_CONT 18 +#define _3DPRIM_LINESTRIP_BF 19 +#define _3DPRIM_LINESTRIP_CONT_BF 20 +#define _3DPRIM_TRIFAN_NOSTIPPLE 22 +#define _3DPRIM_PATCHLIST_1 32 +#define _3DPRIM_PATCHLIST_2 33 +#define _3DPRIM_PATCHLIST_3 34 +#define _3DPRIM_PATCHLIST_4 35 +#define _3DPRIM_PATCHLIST_5 36 +#define _3DPRIM_PATCHLIST_6 37 +#define _3DPRIM_PATCHLIST_7 38 +#define _3DPRIM_PATCHLIST_8 39 +#define _3DPRIM_PATCHLIST_9 40 +#define _3DPRIM_PATCHLIST_10 41 +#define _3DPRIM_PATCHLIST_11 42 +#define _3DPRIM_PATCHLIST_12 43 +#define _3DPRIM_PATCHLIST_13 44 +#define _3DPRIM_PATCHLIST_14 45 +#define _3DPRIM_PATCHLIST_15 46 +#define _3DPRIM_PATCHLIST_16 47 +#define _3DPRIM_PATCHLIST_17 48 +#define _3DPRIM_PATCHLIST_18 49 +#define _3DPRIM_PATCHLIST_19 50 +#define _3DPRIM_PATCHLIST_20 51 +#define _3DPRIM_PATCHLIST_21 52 +#define _3DPRIM_PATCHLIST_22 53 +#define _3DPRIM_PATCHLIST_23 54 +#define _3DPRIM_PATCHLIST_24 55 +#define _3DPRIM_PATCHLIST_25 56 +#define _3DPRIM_PATCHLIST_26 57 +#define _3DPRIM_PATCHLIST_27 58 +#define _3DPRIM_PATCHLIST_28 59 +#define _3DPRIM_PATCHLIST_29 60 +#define _3DPRIM_PATCHLIST_30 61 +#define _3DPRIM_PATCHLIST_31 62 +#define _3DPRIM_PATCHLIST_32 63 + +/* Enum 3D_Vertex_Component_Control */ +#define VFCOMP_NOSTORE 0 +#define VFCOMP_STORE_SRC 1 +#define VFCOMP_STORE_0 2 +#define VFCOMP_STORE_1_FP 3 +#define VFCOMP_STORE_1_INT 4 +#define VFCOMP_STORE_PID 7 + +/* Enum WRAP_SHORTEST_ENABLE */ +#define WSE_X 1 +#define WSE_Y 2 +#define WSE_XY 3 +#define WSE_Z 4 +#define WSE_XZ 5 +#define WSE_YZ 6 +#define WSE_XYZ 7 +#define WSE_W 8 +#define WSE_XW 9 +#define WSE_YW 10 +#define WSE_XYW 11 +#define WSE_ZW 12 +#define WSE_XZW 13 +#define WSE_YZW 14 +#define WSE_XYZW 15 + +/* Enum 3D_Stencil_Operation */ +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 1 +#define STENCILOP_REPLACE 2 +#define STENCILOP_INCRSAT 3 +#define STENCILOP_DECRSAT 4 +#define STENCILOP_INCR 5 +#define STENCILOP_DECR 6 +#define STENCILOP_INVERT 7 + +/* Enum 3D_Color_Buffer_Blend_Factor */ +#define BLENDFACTOR_ONE 1 +#define BLENDFACTOR_SRC_COLOR 2 +#define BLENDFACTOR_SRC_ALPHA 3 +#define BLENDFACTOR_DST_ALPHA 4 +#define BLENDFACTOR_DST_COLOR 5 +#define BLENDFACTOR_SRC_ALPHA_SATURATE 6 +#define BLENDFACTOR_CONST_COLOR 7 +#define BLENDFACTOR_CONST_ALPHA 8 +#define BLENDFACTOR_SRC1_COLOR 9 +#define BLENDFACTOR_SRC1_ALPHA 10 +#define BLENDFACTOR_ZERO 17 +#define BLENDFACTOR_INV_SRC_COLOR 18 +#define BLENDFACTOR_INV_SRC_ALPHA 19 +#define BLENDFACTOR_INV_DST_ALPHA 20 +#define BLENDFACTOR_INV_DST_COLOR 21 +#define BLENDFACTOR_INV_CONST_COLOR 23 +#define BLENDFACTOR_INV_CONST_ALPHA 24 +#define BLENDFACTOR_INV_SRC1_COLOR 25 +#define BLENDFACTOR_INV_SRC1_ALPHA 26 + +/* Enum 3D_Color_Buffer_Blend_Function */ +#define BLENDFUNCTION_ADD 0 +#define BLENDFUNCTION_SUBTRACT 1 +#define BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BLENDFUNCTION_MIN 3 +#define BLENDFUNCTION_MAX 4 + +/* Enum 3D_Compare_Function */ +#define COMPAREFUNCTION_ALWAYS 0 +#define COMPAREFUNCTION_NEVER 1 +#define COMPAREFUNCTION_LESS 2 +#define COMPAREFUNCTION_EQUAL 3 +#define COMPAREFUNCTION_LEQUAL 4 +#define COMPAREFUNCTION_GREATER 5 +#define COMPAREFUNCTION_NOTEQUAL 6 +#define COMPAREFUNCTION_GEQUAL 7 + +/* Enum 3D_Logic_Op_Function */ +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 1 +#define LOGICOP_AND_INVERTED 2 +#define LOGICOP_COPY_INVERTED 3 +#define LOGICOP_AND_REVERSE 4 +#define LOGICOP_INVERT 5 +#define LOGICOP_XOR 6 +#define LOGICOP_NAND 7 +#define LOGICOP_AND 8 +#define LOGICOP_EQUIV 9 +#define LOGICOP_NOOP 10 +#define LOGICOP_OR_INVERTED 11 +#define LOGICOP_COPY 12 +#define LOGICOP_OR_REVERSE 13 +#define LOGICOP_OR 14 +#define LOGICOP_SET 15 + +/* Enum SURFACE_FORMAT */ +#define R32G32B32A32_FLOAT 0 +#define R32G32B32A32_SINT 1 +#define R32G32B32A32_UINT 2 +#define R32G32B32A32_UNORM 3 +#define R32G32B32A32_SNORM 4 +#define R64G64_FLOAT 5 +#define R32G32B32X32_FLOAT 6 +#define R32G32B32A32_SSCALED 7 +#define R32G32B32A32_USCALED 8 +#define R32G32B32A32_SFIXED 32 +#define R64G64_PASSTHRU 33 +#define R32G32B32_FLOAT 64 +#define R32G32B32_SINT 65 +#define R32G32B32_UINT 66 +#define R32G32B32_UNORM 67 +#define R32G32B32_SNORM 68 +#define R32G32B32_SSCALED 69 +#define R32G32B32_USCALED 70 +#define R32G32B32_SFIXED 80 +#define R16G16B16A16_UNORM 128 +#define R16G16B16A16_SNORM 129 +#define R16G16B16A16_SINT 130 +#define R16G16B16A16_UINT 131 +#define R16G16B16A16_FLOAT 132 +#define R32G32_FLOAT 133 +#define R32G32_SINT 134 +#define R32G32_UINT 135 +#define R32_FLOAT_X8X24_TYPELESS 136 +#define X32_TYPELESS_G8X24_UINT 137 +#define L32A32_FLOAT 138 +#define R32G32_UNORM 139 +#define R32G32_SNORM 140 +#define R64_FLOAT 141 +#define R16G16B16X16_UNORM 142 +#define R16G16B16X16_FLOAT 143 +#define A32X32_FLOAT 144 +#define L32X32_FLOAT 145 +#define I32X32_FLOAT 146 +#define R16G16B16A16_SSCALED 147 +#define R16G16B16A16_USCALED 148 +#define R32G32_SSCALED 149 +#define R32G32_USCALED 150 +#define R32G32_SFIXED 160 +#define R64_PASSTHRU 161 +#define B8G8R8A8_UNORM 192 +#define B8G8R8A8_UNORM_SRGB 193 +#define R10G10B10A2_UNORM 194 +#define R10G10B10A2_UNORM_SRGB 195 +#define R10G10B10A2_UINT 196 +#define R10G10B10_SNORM_A2_UNORM 197 +#define R8G8B8A8_UNORM 199 +#define R8G8B8A8_UNORM_SRGB 200 +#define R8G8B8A8_SNORM 201 +#define R8G8B8A8_SINT 202 +#define R8G8B8A8_UINT 203 +#define R16G16_UNORM 204 +#define R16G16_SNORM 205 +#define R16G16_SINT 206 +#define R16G16_UINT 207 +#define R16G16_FLOAT 208 +#define B10G10R10A2_UNORM 209 +#define B10G10R10A2_UNORM_SRGB 210 +#define R11G11B10_FLOAT 211 +#define R32_SINT 214 +#define R32_UINT 215 +#define R32_FLOAT 216 +#define R24_UNORM_X8_TYPELESS 217 +#define X24_TYPELESS_G8_UINT 218 +#define L32_UNORM 221 +#define A32_UNORM 222 +#define L16A16_UNORM 223 +#define I24X8_UNORM 224 +#define L24X8_UNORM 225 +#define A24X8_UNORM 226 +#define I32_FLOAT 227 +#define L32_FLOAT 228 +#define A32_FLOAT 229 +#define X8B8_UNORM_G8R8_SNORM 230 +#define A8X8_UNORM_G8R8_SNORM 231 +#define B8X8_UNORM_G8R8_SNORM 232 +#define B8G8R8X8_UNORM 233 +#define B8G8R8X8_UNORM_SRGB 234 +#define R8G8B8X8_UNORM 235 +#define R8G8B8X8_UNORM_SRGB 236 +#define R9G9B9E5_SHAREDEXP 237 +#define B10G10R10X2_UNORM 238 +#define L16A16_FLOAT 240 +#define R32_UNORM 241 +#define R32_SNORM 242 +#define R10G10B10X2_USCALED 243 +#define R8G8B8A8_SSCALED 244 +#define R8G8B8A8_USCALED 245 +#define R16G16_SSCALED 246 +#define R16G16_USCALED 247 +#define R32_SSCALED 248 +#define R32_USCALED 249 +#define B5G6R5_UNORM 256 +#define B5G6R5_UNORM_SRGB 257 +#define B5G5R5A1_UNORM 258 +#define B5G5R5A1_UNORM_SRGB 259 +#define B4G4R4A4_UNORM 260 +#define B4G4R4A4_UNORM_SRGB 261 +#define R8G8_UNORM 262 +#define R8G8_SNORM 263 +#define R8G8_SINT 264 +#define R8G8_UINT 265 +#define R16_UNORM 266 +#define R16_SNORM 267 +#define R16_SINT 268 +#define R16_UINT 269 +#define R16_FLOAT 270 +#define A8P8_UNORM_PALETTE0 271 +#define A8P8_UNORM_PALETTE1 272 +#define I16_UNORM 273 +#define L16_UNORM 274 +#define A16_UNORM 275 +#define L8A8_UNORM 276 +#define I16_FLOAT 277 +#define L16_FLOAT 278 +#define A16_FLOAT 279 +#define L8A8_UNORM_SRGB 280 +#define R5G5_SNORM_B6_UNORM 281 +#define B5G5R5X1_UNORM 282 +#define B5G5R5X1_UNORM_SRGB 283 +#define R8G8_SSCALED 284 +#define R8G8_USCALED 285 +#define R16_SSCALED 286 +#define R16_USCALED 287 +#define P8A8_UNORM_PALETTE0 290 +#define P8A8_UNORM_PALETTE1 291 +#define A1B5G5R5_UNORM 292 +#define A4B4G4R4_UNORM 293 +#define L8A8_UINT 294 +#define L8A8_SINT 295 +#define R8_UNORM 320 +#define R8_SNORM 321 +#define R8_SINT 322 +#define R8_UINT 323 +#define A8_UNORM 324 +#define I8_UNORM 325 +#define L8_UNORM 326 +#define P4A4_UNORM_PALETTE0 327 +#define A4P4_UNORM_PALETTE0 328 +#define R8_SSCALED 329 +#define R8_USCALED 330 +#define P8_UNORM_PALETTE0 331 +#define L8_UNORM_SRGB 332 +#define P8_UNORM_PALETTE1 333 +#define P4A4_UNORM_PALETTE1 334 +#define A4P4_UNORM_PALETTE1 335 +#define Y8_UNORM 336 +#define L8_UINT 338 +#define L8_SINT 339 +#define I8_UINT 340 +#define I8_SINT 341 +#define DXT1_RGB_SRGB 384 +#define R1_UNORM 385 +#define YCRCB_NORMAL 386 +#define YCRCB_SWAPUVY 387 +#define P2_UNORM_PALETTE0 388 +#define P2_UNORM_PALETTE1 389 +#define BC1_UNORM 390 +#define BC2_UNORM 391 +#define BC3_UNORM 392 +#define BC4_UNORM 393 +#define BC5_UNORM 394 +#define BC1_UNORM_SRGB 395 +#define BC2_UNORM_SRGB 396 +#define BC3_UNORM_SRGB 397 +#define MONO8 398 +#define YCRCB_SWAPUV 399 +#define YCRCB_SWAPY 400 +#define DXT1_RGB 401 +#define FXT1 402 +#define R8G8B8_UNORM 403 +#define R8G8B8_SNORM 404 +#define R8G8B8_SSCALED 405 +#define R8G8B8_USCALED 406 +#define R64G64B64A64_FLOAT 407 +#define R64G64B64_FLOAT 408 +#define BC4_SNORM 409 +#define BC5_SNORM 410 +#define R16G16B16_FLOAT 411 +#define R16G16B16_UNORM 412 +#define R16G16B16_SNORM 413 +#define R16G16B16_SSCALED 414 +#define R16G16B16_USCALED 415 +#define BC6H_SF16 417 +#define BC7_UNORM 418 +#define BC7_UNORM_SRGB 419 +#define BC6H_UF16 420 +#define PLANAR_420_8 421 +#define R8G8B8_UNORM_SRGB 424 +#define ETC1_RGB8 425 +#define ETC2_RGB8 426 +#define EAC_R11 427 +#define EAC_RG11 428 +#define EAC_SIGNED_R11 429 +#define EAC_SIGNED_RG11 430 +#define ETC2_SRGB8 431 +#define R16G16B16_UINT 432 +#define R16G16B16_SINT 433 +#define R32_SFIXED 434 +#define R10G10B10A2_SNORM 435 +#define R10G10B10A2_USCALED 436 +#define R10G10B10A2_SSCALED 437 +#define R10G10B10A2_SINT 438 +#define B10G10R10A2_SNORM 439 +#define B10G10R10A2_USCALED 440 +#define B10G10R10A2_SSCALED 441 +#define B10G10R10A2_UINT 442 +#define B10G10R10A2_SINT 443 +#define R64G64B64A64_PASSTHRU 444 +#define R64G64B64_PASSTHRU 445 +#define ETC2_RGB8_PTA 448 +#define ETC2_SRGB8_PTA 449 +#define ETC2_EAC_RGBA8 450 +#define ETC2_EAC_SRGB8_A8 451 +#define R8G8B8_UINT 456 +#define R8G8B8_SINT 457 +#define RAW 511 + +/* Enum Shader Channel Select */ +#define SCS_ZERO 0 +#define SCS_ONE 1 +#define SCS_RED 4 +#define SCS_GREEN 5 +#define SCS_BLUE 6 +#define SCS_ALPHA 7 + +/* Enum Clear Color */ +#define CC_ZERO 0 +#define CC_ONE 1 + +/* Enum Texture Coordinate Mode */ +#define TCM_WRAP 0 +#define TCM_MIRROR 1 +#define TCM_CLAMP 2 +#define TCM_CUBE 3 +#define TCM_CLAMP_BORDER 4 +#define TCM_MIRROR_ONCE 5 +#define TCM_HALF_BORDER 6 + diff --git a/src/vulkan/glsl_scraper.py b/src/vulkan/glsl_scraper.py new file mode 100644 index 00000000000..d1514712762 --- /dev/null +++ b/src/vulkan/glsl_scraper.py @@ -0,0 +1,274 @@ +#! /usr/bin/env python + +import argparse +import cStringIO +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +from textwrap import dedent + +class Shader: + def __init__(self, stage): + self.stream = cStringIO.StringIO() + self.stage = stage + + if self.stage == 'VERTEX': + self.ext = 'vert' + elif self.stage == 'TESS_CONTROL': + self.ext = 'tesc' + elif self.stage == 'TESS_EVALUATION': + self.ext = 'tese' + elif self.stage == 'GEOMETRY': + self.ext = 'geom' + elif self.stage == 'FRAGMENT': + self.ext = 'frag' + elif self.stage == 'COMPUTE': + self.ext = 'comp' + else: + assert False + + def add_text(self, s): + self.stream.write(s) + + def finish_text(self, line): + self.line = line + + def glsl_source(self): + return self.stream.getvalue() + + def compile(self): + # We can assume if we got here that we have a temp directory and that + # we're currently living in it. + glsl_fname = 'shader{0}.{1}'.format(self.line, self.ext) + spirv_fname = self.ext + '.spv' + + glsl_file = open(glsl_fname, 'w') + glsl_file.write('#version 420 core\n') + glsl_file.write(self.glsl_source()) + glsl_file.close() + + out = open('glslang.out', 'wb') + err = subprocess.call([glslang, '-V', glsl_fname], stdout=out) + if err != 0: + out = open('glslang.out', 'r') + sys.stderr.write(out.read()) + out.close() + exit(1) + + def dwords(f): + while True: + dword_str = f.read(4) + if not dword_str: + return + assert len(dword_str) == 4 + yield struct.unpack('I', dword_str)[0] + + spirv_file = open(spirv_fname, 'rb') + self.dwords = list(dwords(spirv_file)) + spirv_file.close() + + os.remove(glsl_fname) + os.remove(spirv_fname) + + def dump_c_code(self, f, glsl_only = False): + f.write('\n\n') + var_prefix = '_glsl_helpers_shader{0}'.format(self.line) + + # First dump the GLSL source as strings + f.write('static const char {0}_glsl_src[] ='.format(var_prefix)) + f.write('\n_ANV_SPIRV_' + self.stage) + f.write('\n"#version 330\\n"') + for line in self.glsl_source().splitlines(): + if not line.strip(): + continue + f.write('\n"{0}\\n"'.format(line)) + f.write(';\n\n') + + if glsl_only: + return + + # Now dump the SPIR-V source + f.write('static const uint32_t {0}_spir_v_src[] = {{'.format(var_prefix)) + line_start = 0 + while line_start < len(self.dwords): + f.write('\n ') + for i in range(line_start, min(line_start + 6, len(self.dwords))): + f.write(' 0x{:08x},'.format(self.dwords[i])) + line_start += 6 + f.write('\n};\n') + +token_exp = re.compile(r'(GLSL_VK_SHADER_MODULE|\(|\)|,)') + +class Parser: + def __init__(self, f): + self.infile = f + self.paren_depth = 0 + self.shader = None + self.line_number = 1 + self.shaders = [] + + def tokenize(f): + leftover = '' + for line in f: + pos = 0 + while True: + m = token_exp.search(line, pos) + if m: + if m.start() > pos: + leftover += line[pos:m.start()] + pos = m.end() + + if leftover: + yield leftover + leftover = '' + + yield m.group(0) + + else: + leftover += line[pos:] + break + + self.line_number += 1 + + if leftover: + yield leftover + + self.token_iter = tokenize(self.infile) + + def handle_shader_src(self): + paren_depth = 1 + for t in self.token_iter: + if t == '(': + paren_depth += 1 + elif t == ')': + paren_depth -= 1 + if paren_depth == 0: + return + + self.current_shader.add_text(t) + + def handle_macro(self): + t = self.token_iter.next() + assert t == '(' + t = self.token_iter.next() + t = self.token_iter.next() + assert t == ',' + + stage = self.token_iter.next().strip() + + t = self.token_iter.next() + assert t == ',' + + self.current_shader = Shader(stage) + self.handle_shader_src() + self.current_shader.finish_text(self.line_number) + + self.shaders.append(self.current_shader) + self.current_shader = None + + def run(self): + for t in self.token_iter: + if t == 'GLSL_VK_SHADER_MODULE': + self.handle_macro() + +def open_file(name, mode): + if name == '-': + if mode == 'w': + return sys.stdout + elif mode == 'r': + return sys.stdin + else: + assert False + else: + return open(name, mode) + +def parse_args(): + description = dedent("""\ + This program scrapes a C file for any instance of the + GLSL_VK_SHADER_MODULE macro, grabs the GLSL source code, compiles it + to SPIR-V. The resulting SPIR-V code is written to another C file as + an array of 32-bit words. + + If '-' is passed as the input file or output file, stdin or stdout will be + used instead of a file on disc.""") + + p = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-o', '--outfile', default='-', + help='Output to the given file (default: stdout).') + p.add_argument('--with-glslang', metavar='PATH', + default='glslangValidator', + dest='glslang', + help='Full path to the glslangValidator program.') + p.add_argument('--glsl-only', action='store_true') + p.add_argument('infile', metavar='INFILE') + + return p.parse_args() + + +args = parse_args() +infname = args.infile +outfname = args.outfile +glslang = args.glslang +glsl_only = args.glsl_only + +with open_file(infname, 'r') as infile: + parser = Parser(infile) + parser.run() + +if not glsl_only: + # glslangValidator has an absolutely *insane* interface. We pretty much + # have to run in a temporary directory. Sad day. + current_dir = os.getcwd() + tmpdir = tempfile.mkdtemp('glsl_scraper') + + try: + os.chdir(tmpdir) + + for shader in parser.shaders: + shader.compile() + + os.chdir(current_dir) + finally: + shutil.rmtree(tmpdir) + +with open_file(outfname, 'w') as outfile: + outfile.write(dedent("""\ + /* =========================== DO NOT EDIT! =========================== + * This file is autogenerated by glsl_scraper.py. + */ + + #include <stdint.h> + + #define _ANV_SPIRV_MAGIC "\\x03\\x02\\x23\\x07\\0\\0\\0\\0" + + #define _ANV_SPIRV_VERTEX _ANV_SPIRV_MAGIC "\\0\\0\\0\\0" + #define _ANV_SPIRV_TESS_CONTROL _ANV_SPIRV_MAGIC "\\1\\0\\0\\0" + #define _ANV_SPIRV_TESS_EVALUATION _ANV_SPIRV_MAGIC "\\2\\0\\0\\0" + #define _ANV_SPIRV_GEOMETRY _ANV_SPIRV_MAGIC "\\3\\0\\0\\0" + #define _ANV_SPIRV_FRAGMENT _ANV_SPIRV_MAGIC "\\4\\0\\0\\0" + #define _ANV_SPIRV_COMPUTE _ANV_SPIRV_MAGIC "\\5\\0\\0\\0" + + #define _ANV_GLSL_SRC_VAR2(_line) _glsl_helpers_shader ## _line ## _glsl_src + #define _ANV_GLSL_SRC_VAR(_line) _ANV_GLSL_SRC_VAR2(_line) + + #define GLSL_VK_SHADER_MODULE(device, stage, ...) ({ \\ + VkShaderModule __module; \\ + VkShaderModuleCreateInfo __shader_create_info = { \\ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, \\ + .codeSize = sizeof(_ANV_GLSL_SRC_VAR(__LINE__)), \\ + .pCode = _ANV_GLSL_SRC_VAR(__LINE__), \\ + }; \\ + vkCreateShaderModule(anv_device_to_handle(device), \\ + &__shader_create_info, &__module); \\ + __module; \\ + }) + """)) + + for shader in parser.shaders: + shader.dump_c_code(outfile, glsl_only) diff --git a/src/vulkan/tests/.gitignore b/src/vulkan/tests/.gitignore new file mode 100644 index 00000000000..9f4be5270f6 --- /dev/null +++ b/src/vulkan/tests/.gitignore @@ -0,0 +1,4 @@ +block_pool +state_pool +state_pool_free_list_only +state_pool_no_free diff --git a/src/vulkan/tests/Makefile.am b/src/vulkan/tests/Makefile.am new file mode 100644 index 00000000000..7b15bb002be --- /dev/null +++ b/src/vulkan/tests/Makefile.am @@ -0,0 +1,45 @@ +# Copyright © 2009 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CPPFLAGS = \ + $(INTEL_CFLAGS) \ + $(VALGRIND_CFLAGS) \ + $(DEFINES) \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa \ + -I$(top_srcdir)/src/mesa/drivers/dri/common \ + -I$(top_srcdir)/src/mesa/drivers/dri/i965 \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/vulkan + +LDADD = \ + $(top_builddir)/src/vulkan/libvulkan-test.la \ + $(PTHREAD_LIBS) -lm -lstdc++ + +check_PROGRAMS = \ + block_pool_no_free \ + state_pool_no_free \ + state_pool_free_list_only \ + state_pool + +TESTS = $(check_PROGRAMS) diff --git a/src/vulkan/tests/block_pool_no_free.c b/src/vulkan/tests/block_pool_no_free.c new file mode 100644 index 00000000000..898a82b0909 --- /dev/null +++ b/src/vulkan/tests/block_pool_no_free.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define BLOCKS_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_block_pool *pool; + uint32_t blocks[BLOCKS_PER_THREAD]; +} jobs[NUM_THREADS]; + +static void *alloc_blocks(void *_job) +{ + struct job *job = _job; + + for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) + job->blocks[i] = anv_block_pool_alloc(job->pool); + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool pool; + + anv_block_pool_init(&pool, &device, 16); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_blocks, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= BLOCKS_PER_THREAD) + continue; + + if (thread_max < jobs[i].blocks[next[i]]) { + thread_max = jobs[i].blocks[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].blocks[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].blocks[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_block_pool_finish(&pool); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool.c b/src/vulkan/tests/state_pool.c new file mode 100644 index 00000000000..e235ee9b394 --- /dev/null +++ b/src/vulkan/tests/state_pool.c @@ -0,0 +1,53 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 10 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) +#define NUM_RUNS 64 + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + for (unsigned i = 0; i < NUM_RUNS; i++) { + anv_block_pool_init(&block_pool, &device, 256); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); + } +} diff --git a/src/vulkan/tests/state_pool_free_list_only.c b/src/vulkan/tests/state_pool_free_list_only.c new file mode 100644 index 00000000000..9e89cf6425f --- /dev/null +++ b/src/vulkan/tests/state_pool_free_list_only.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 8 +#define STATES_PER_THREAD_LOG2 12 +#define STATES_PER_THREAD (1 << STATES_PER_THREAD_LOG2) + +#include "state_pool_test_helper.h" + +int main(int argc, char **argv) +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + anv_block_pool_init(&block_pool, &device, 4096); + anv_state_pool_init(&state_pool, &block_pool); + + /* Grab one so a zero offset is impossible */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab and return enough states that the state pool test below won't + * actually ever resize anything. + */ + { + struct anv_state states[NUM_THREADS * STATES_PER_THREAD]; + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) { + states[i] = anv_state_pool_alloc(&state_pool, 16, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < NUM_THREADS * STATES_PER_THREAD; i++) + anv_state_pool_free(&state_pool, states[i]); + } + + run_state_pool_test(&state_pool); + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); +} diff --git a/src/vulkan/tests/state_pool_no_free.c b/src/vulkan/tests/state_pool_no_free.c new file mode 100644 index 00000000000..4b3ca78974f --- /dev/null +++ b/src/vulkan/tests/state_pool_no_free.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +#include "anv_private.h" + +#define NUM_THREADS 16 +#define STATES_PER_THREAD 1024 +#define NUM_RUNS 64 + +struct job { + pthread_t thread; + unsigned id; + struct anv_state_pool *pool; + uint32_t offsets[STATES_PER_THREAD]; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *_job) +{ + struct job *job = _job; + + pthread_barrier_wait(&barrier); + + for (unsigned i = 0; i < STATES_PER_THREAD; i++) { + struct anv_state state = anv_state_pool_alloc(job->pool, 16, 16); + job->offsets[i] = state.offset; + } + + return NULL; +} + +static void run_test() +{ + struct anv_device device; + struct anv_block_pool block_pool; + struct anv_state_pool state_pool; + + anv_block_pool_init(&block_pool, &device, 64); + anv_state_pool_init(&state_pool, &block_pool); + + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = &state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); + + /* A list of indices, one per thread */ + unsigned next[NUM_THREADS]; + memset(next, 0, sizeof(next)); + + int highest = -1; + while (true) { + /* First, we find which thread has the highest next element */ + int thread_max = -1; + int max_thread_idx = -1; + for (unsigned i = 0; i < NUM_THREADS; i++) { + if (next[i] >= STATES_PER_THREAD) + continue; + + if (thread_max < jobs[i].offsets[next[i]]) { + thread_max = jobs[i].offsets[next[i]]; + max_thread_idx = i; + } + } + + /* The only way this can happen is if all of the next[] values are at + * BLOCKS_PER_THREAD, in which case, we're done. + */ + if (thread_max == -1) + break; + + /* That next element had better be higher than the previous highest */ + assert(jobs[max_thread_idx].offsets[next[max_thread_idx]] > highest); + + highest = jobs[max_thread_idx].offsets[next[max_thread_idx]]; + next[max_thread_idx]++; + } + + anv_state_pool_finish(&state_pool); + anv_block_pool_finish(&block_pool); +} + +int main(int argc, char **argv) +{ + for (unsigned i = 0; i < NUM_RUNS; i++) + run_test(); +} diff --git a/src/vulkan/tests/state_pool_test_helper.h b/src/vulkan/tests/state_pool_test_helper.h new file mode 100644 index 00000000000..0e56431303f --- /dev/null +++ b/src/vulkan/tests/state_pool_test_helper.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <pthread.h> + +struct job { + struct anv_state_pool *pool; + unsigned id; + pthread_t thread; +} jobs[NUM_THREADS]; + +pthread_barrier_t barrier; + +static void *alloc_states(void *void_job) +{ + struct job *job = void_job; + + const unsigned chunk_size = 1 << (job->id % STATES_PER_THREAD_LOG2); + const unsigned num_chunks = STATES_PER_THREAD / chunk_size; + + struct anv_state states[chunk_size]; + + pthread_barrier_wait(&barrier); + + for (unsigned c = 0; c < num_chunks; c++) { + for (unsigned i = 0; i < chunk_size; i++) { + states[i] = anv_state_pool_alloc(job->pool, 16, 16); + memset(states[i].map, 139, 16); + assert(states[i].offset != 0); + } + + for (unsigned i = 0; i < chunk_size; i++) + anv_state_pool_free(job->pool, states[i]); + } + + return NULL; +} + +static void run_state_pool_test(struct anv_state_pool *state_pool) +{ + pthread_barrier_init(&barrier, NULL, NUM_THREADS); + + for (unsigned i = 0; i < NUM_THREADS; i++) { + jobs[i].pool = state_pool; + jobs[i].id = i; + pthread_create(&jobs[i].thread, NULL, alloc_states, &jobs[i]); + } + + for (unsigned i = 0; i < NUM_THREADS; i++) + pthread_join(jobs[i].thread, NULL); +} |